async def test_conflict_logs(faker, namespace, caplog, api_client):
    caplog.set_level(logging.DEBUG, logger=__name__)
    core = CoreV1Api(api_client)
    name = faker.domain_word()
    ns = namespace.metadata.name
    password1 = faker.password(length=12)
    password2 = faker.password(length=12)
    await core.create_namespaced_secret(
        namespace=ns,
        body=V1Secret(
            data={"password": b64encode(password1)},
            metadata=V1ObjectMeta(name=name),
            type="Opaque",
        ),
    )
    await call_kubeapi(
        core.create_namespaced_secret,
        logger,
        continue_on_conflict=True,
        namespace=ns,
        body=V1Secret(
            data={"password": b64encode(password2)},
            metadata=V1ObjectMeta(name=name),
            type="Opaque",
        ),
    )
    secret = await core.read_namespaced_secret(name=name, namespace=ns)
    assert b64decode(secret.data["password"]) == password1
    assert (
        f"Failed creating V1Secret '{ns}/{name}' because it already exists. Continuing."
        in caplog.messages
    )
Exemple #2
0
async def create_services(
    owner_references: Optional[List[V1OwnerReference]],
    namespace: str,
    name: str,
    labels: LabelType,
    http_port: int,
    postgres_port: int,
    transport_port: int,
    dns_record: Optional[str],
    logger: logging.Logger,
) -> None:
    async with ApiClient() as api_client:
        core = CoreV1Api(api_client)
        await call_kubeapi(
            core.create_namespaced_service,
            logger,
            continue_on_conflict=True,
            namespace=namespace,
            body=get_data_service(owner_references, name, labels, http_port,
                                  postgres_port, dns_record),
        )
        await call_kubeapi(
            core.create_namespaced_service,
            logger,
            continue_on_conflict=True,
            namespace=namespace,
            body=get_discovery_service(owner_references, name, labels,
                                       transport_port),
        )
Exemple #3
0
async def create_debug_volume(
    owner_references: Optional[List[V1OwnerReference]],
    namespace: str,
    name: str,
    labels: LabelType,
    logger: logging.Logger,
) -> None:
    """
    Creates a ``PersistentVolume`` and ``PersistentVolumeClaim`` to be used for
    exporting Java Heapdumps from CrateDB. The volume can be configured
    with the :attr:`~crate.operator.config.Config.DEBUG_VOLUME_SIZE` and
    :attr:`~crate.operator.config.Config.DEBUG_VOLUME_STORAGE_CLASS` settings.
    """
    async with ApiClient() as api_client:
        core = CoreV1Api(api_client)
        await call_kubeapi(
            core.create_persistent_volume,
            logger,
            continue_on_conflict=True,
            body=get_debug_persistent_volume(owner_references, namespace, name,
                                             labels),
        )
        await call_kubeapi(
            core.create_namespaced_persistent_volume_claim,
            logger,
            continue_on_conflict=True,
            namespace=namespace,
            body=get_debug_persistent_volume_claim(owner_references, name,
                                                   labels),
        )
    async def test_create(self, faker, namespace, cleanup_handler):
        core = CoreV1Api()
        name = faker.domain_word()

        # Clean up persistent volume after the test
        cleanup_handler.append(
            core.delete_persistent_volume(
                name=f"temp-pv-{namespace.metadata.name}-{name}"))

        pv, pvc = await asyncio.gather(*create_debug_volume(
            core,
            None,
            namespace.metadata.name,
            name,
            {},
            logging.getLogger(__name__),
        ))
        await assert_wait_for(
            True,
            self.does_pv_exist,
            core,
            f"temp-pv-{namespace.metadata.name}-{name}",
        )
        await assert_wait_for(
            True,
            self.does_pvc_exist,
            core,
            namespace.metadata.name,
            f"local-resource-{name}",
        )
async def test_conflict_raises(faker, namespace, api_client):
    core = CoreV1Api(api_client)
    name = faker.domain_word()
    ns = namespace.metadata.name
    password1 = faker.password(length=12)
    password2 = faker.password(length=12)
    await core.create_namespaced_secret(
        namespace=ns,
        body=V1Secret(
            data={"password": b64encode(password1)},
            metadata=V1ObjectMeta(name=name),
            type="Opaque",
        ),
    )
    with pytest.raises(ApiException):
        await call_kubeapi(
            core.create_namespaced_secret,
            logger,
            namespace=ns,
            body=V1Secret(
                data={"password": b64encode(password2)},
                metadata=V1ObjectMeta(name=name),
                type="Opaque",
            ),
        )
    secret = await core.read_namespaced_secret(name=name, namespace=ns)
    assert b64decode(secret.data["password"]) == password1
Exemple #6
0
async def namespace(faker, api_client) -> V1Namespace:
    core = CoreV1Api(api_client)
    name = faker.uuid4()
    await assert_wait_for(False, does_namespace_exist, core, name)
    ns: V1Namespace = await core.create_namespace(body=V1Namespace(
        metadata=V1ObjectMeta(name=name)))
    await assert_wait_for(True, does_namespace_exist, core, name)
    yield ns
    await core.delete_namespace(name=ns.metadata.name, body=V1DeleteOptions())
async def test_absent_raises(faker, namespace, api_client):
    core = CoreV1Api(api_client)
    name = faker.domain_word()
    with pytest.raises(ApiException):
        await call_kubeapi(
            core.delete_namespaced_secret,
            logger,
            namespace=namespace.metadata.name,
            name=name,
            body=V1DeleteOptions(),
        )
 async def test_create(self, faker, namespace):
     core = CoreV1Api()
     name = faker.domain_word()
     await create_sql_exporter_config(core, None, namespace.metadata.name,
                                      name, {}, logging.getLogger(__name__))
     await assert_wait_for(
         True,
         self.does_configmap_exist,
         core,
         namespace.metadata.name,
         f"crate-sql-exporter-{name}",
     )
Exemple #9
0
async def reset_allocation(namespace: str, pod_name: str, has_ssl: bool) -> None:
    """
    Reset all temporary node deallocations to none.

    .. note::

       Ideally, we'd be using the system user to reset the allocation
       exclusions. However, `due to a bug
       <https://github.com/crate/crate/pull/10083>`_, this isn't possible in
       CrateDB <= 4.1.6. We therefore fall back to the "exec-in-container"
       approach that we also use during cluster bootstrapping.

    :param namespace: The Kubernetes namespace for the CrateDB cluster.
    :param pod_name: The pod name of one of the eligible master nodes in
        the cluster. Used to ``exec`` into.
    :param has_ssl: When ``True``, ``crash`` will establish a connection to
        the CrateDB cluster from inside the ``crate`` container using SSL/TLS.
        This must match how the cluster is configured, otherwise ``crash``
        won't be able to connect, since non-encrypted connections are forbidden
        when SSL/TLS is enabled, and encrypted connections aren't possible when
        no SSL/TLS is configured.
    """

    # async with conn_factory() as conn:
    #     async with conn.cursor() as cursor:
    #         await cursor.execute(
    #             """
    #             RESET GLOBAL "cluster.routing.allocation.exclude._name"
    #             """,
    #         )

    scheme = "https" if has_ssl else "http"
    command_grant = [
        "crash",
        "--verify-ssl=false",
        f"--host={scheme}://localhost:4200",
        "-c",
        'RESET GLOBAL "cluster.routing.allocation.exclude._name";',
    ]
    async with WsApiClient() as ws_api_client:
        core_ws = CoreV1Api(ws_api_client)
        await core_ws.connect_get_namespaced_pod_exec(
            namespace=namespace,
            name=pod_name,
            command=command_grant,
            container="crate",
            stderr=True,
            stdin=False,
            stdout=True,
            tty=False,
        )
Exemple #10
0
    async def handle(  # type: ignore
        self,
        namespace: str,
        name: str,
        old: kopf.Body,
        logger: logging.Logger,
        patch: kopf.Patch,
        status: kopf.Status,
        **kwargs: Any,
    ):
        async with ApiClient() as api_client:
            core = CoreV1Api(api_client)
            await restart_cluster(core, namespace, name, old, logger, patch,
                                  status)

        await self.send_notifications(logger)
Exemple #11
0
async def create_sql_exporter_config(
    owner_references: Optional[List[V1OwnerReference]],
    namespace: str,
    name: str,
    labels: LabelType,
    logger: logging.Logger,
) -> None:
    async with ApiClient() as api_client:
        core = CoreV1Api(api_client)
        await call_kubeapi(
            core.create_namespaced_config_map,
            logger,
            continue_on_conflict=True,
            namespace=namespace,
            body=get_sql_exporter_config(owner_references, name, labels),
        )
async def test_absent_logs(faker, namespace, caplog, api_client):
    caplog.set_level(logging.DEBUG, logger=__name__)
    core = CoreV1Api(api_client)
    name = faker.domain_word()
    ns = namespace.metadata.name
    await call_kubeapi(
        core.delete_namespaced_secret,
        logger,
        continue_on_absence=True,
        namespace=ns,
        name=name,
        body=V1DeleteOptions(),
    )
    assert (
        f"Failed deleting '{ns}/{name}' because it doesn't exist. Continuing."
        in caplog.messages
    )
async def resolve_secret_key_ref(namespace: str,
                                 secret_key_ref: SecretKeyRef,
                                 core: Optional[CoreV1Api] = None) -> str:
    """
    Lookup the secret value defined by ``secret_key_ref`` in ``namespace``.

    :param namespace: The namespace where to lookup a secret and its value.
    :param secret_key_ref: a ``secretKeyRef`` containing the secret name and
        key within that holds the desired value.
    :param core: An instance of the Kubernetes Core V1 API.
    """
    core = core or CoreV1Api()
    secret_name = secret_key_ref["name"]
    key = secret_key_ref["key"]
    secret = await core.read_namespaced_secret(namespace=namespace,
                                               name=secret_name)
    return b64decode(secret.data[key])
Exemple #14
0
async def secret_update(
    namespace: str,
    name: str,
    diff: kopf.Diff,
    logger: logging.Logger,
    **kwargs,
):
    async with ApiClient() as api_client:
        coapi = CustomObjectsApi(api_client)
        core = CoreV1Api(api_client)

        for operation, field_path, old_value, new_value in diff:
            custom_objects = await coapi.list_namespaced_custom_object(
                namespace=namespace,
                group=API_GROUP,
                version="v1",
                plural=RESOURCE_CRATEDB,
            )

            for crate_custom_object in custom_objects["items"]:
                host = await get_host(
                    core, namespace, crate_custom_object["metadata"]["name"]
                )

                for user_spec in crate_custom_object["spec"]["users"]:
                    expected_field_path = (
                        "data",
                        user_spec["password"]["secretKeyRef"]["key"],
                    )
                    if (
                        user_spec["password"]["secretKeyRef"]["name"] == name
                        and field_path == expected_field_path
                    ):
                        kopf.register(
                            fn=subhandler_partial(
                                update_user_password,
                                host,
                                user_spec["name"],
                                old_value,
                                new_value,
                                logger,
                            ),
                            id=f"update-{crate_custom_object['metadata']['name']}-{user_spec['name']}",  # noqa
                            timeout=config.BOOTSTRAP_TIMEOUT,
                        )
async def test_success(faker, namespace, api_client):
    core = CoreV1Api(api_client)
    name = faker.domain_word()
    password = faker.password(length=12)
    await call_kubeapi(
        core.create_namespaced_secret,
        logger,
        namespace=namespace.metadata.name,
        body=V1Secret(
            data={"password": b64encode(password)},
            metadata=V1ObjectMeta(name=name),
            type="Opaque",
        ),
    )
    secret = await core.read_namespaced_secret(
        name=name, namespace=namespace.metadata.name
    )
    assert b64decode(secret.data["password"]) == password
Exemple #16
0
async def create_system_user(
    owner_references: Optional[List[V1OwnerReference]],
    namespace: str,
    name: str,
    labels: LabelType,
    logger: logging.Logger,
) -> None:
    """
    The *CrateDB Operator* will need to perform operations on the CrateDB
    cluster. For that, it will use a ``system`` user who's credentials are
    created here.
    """
    async with ApiClient() as api_client:
        core = CoreV1Api(api_client)
        await call_kubeapi(
            core.create_namespaced_secret,
            logger,
            continue_on_conflict=True,
            namespace=namespace,
            body=get_system_user_secret(owner_references, name, labels),
        )
Exemple #17
0
async def bootstrap_cluster(
    namespace: str,
    name: str,
    master_node_pod: str,
    license: Optional[SecretKeyRefContainer],
    has_ssl: bool,
    users: Optional[List[Dict[str, Any]]],
    logger: logging.Logger,
):
    """
    Bootstrap an entire cluster, including license, system user, and additional
    users.

    :param namespace: The Kubernetes namespace for the CrateDB cluster.
    :param name: The name for the ``CrateDB`` custom resource. Used to lookup
        the password for the system user created during deployment.
    :param master_node_pod: The pod name of one of the eligible master nodes in
        the cluster. Used to ``exec`` into.
    :param license: An optional ``secretKeyRef`` to the Kubernetes secret that
        holds the CrateDB license key.
    :param has_ssl: When ``True``, ``crash`` will establish a connection to
        the CrateDB cluster from inside the ``crate`` container using SSL/TLS.
        This must match how the cluster is configured, otherwise ``crash``
        won't be able to connect, since non-encrypted connections are forbidden
        when SSL/TLS is enabled, and encrypted connections aren't possible when
        no SSL/TLS is configured.
    :param users: An optional list of user definitions containing the username
        and the secret key reference to their password.
    """
    # We first need to set the license, in case the CrateDB cluster
    # contains more nodes than available in the free license.
    async with ApiClient() as api_client:
        core = CoreV1Api(api_client)
        if license:
            await bootstrap_license(core, namespace, master_node_pod, has_ssl,
                                    license, logger)
        await bootstrap_system_user(core, namespace, name, master_node_pod,
                                    has_ssl, logger)
        if users:
            await bootstrap_users(core, namespace, name, users)
 async def test_create(self, faker, namespace):
     core = CoreV1Api()
     name = faker.domain_word()
     s_data, s_discovery = await asyncio.gather(*create_services(
         core,
         None,
         namespace.metadata.name,
         name,
         {},
         1,
         2,
         3,
         faker.domain_name(),
         logging.getLogger(__name__),
     ))
     await assert_wait_for(
         True,
         self.do_services_exist,
         core,
         namespace.metadata.name,
         {f"crate-{name}", f"crate-discovery-{name}"},
     )
 async def test_create(self, faker, namespace):
     core = CoreV1Api()
     name = faker.domain_word()
     password = faker.password(length=12)
     with mock.patch("crate.operator.create.gen_password",
                     return_value=password):
         secret = await create_system_user(
             core,
             None,
             namespace.metadata.name,
             name,
             {},
             logging.getLogger(__name__),
         )
     await assert_wait_for(
         True,
         self.does_secret_exist,
         core,
         namespace.metadata.name,
         f"user-system-{name}",
     )
     assert b64decode(secret.data["password"]) == password
Exemple #20
0
async def update_cratedb_resource(
    namespace: str,
    name: str,
    spec: kopf.Spec,
    **kwargs,
):
    if "users" in spec:
        async with ApiClient() as api_client:
            for user_spec in spec["users"]:
                core = CoreV1Api(api_client)

                secret_name = user_spec["password"]["secretKeyRef"]["name"]
                secret = await core.read_namespaced_secret(
                    namespace=namespace, name=secret_name
                )
                if (
                    secret.metadata.labels is None
                    or LABEL_USER_PASSWORD not in secret.metadata.labels
                ):
                    await ensure_user_password_label(
                        core, namespace, user_spec["password"]["secretKeyRef"]["name"]
                    )
Exemple #21
0
    async def _ensure_no_snapshots_in_progress(self, namespace, name, logger):
        async with ApiClient() as api_client:
            core = CoreV1Api(api_client)

            host = await get_host(core, namespace, name)
            password = await get_system_user_password(core, namespace, name)
            conn_factory = connection_factory(host, password)

            snapshots_in_progress, statement = await are_snapshots_in_progress(
                conn_factory, logger)
            if snapshots_in_progress:
                # Raising a TemporaryError will clear any registered subhandlers, so we
                # execute this one directly instead to make sure it runs.
                # The same guarantees about it being executed only once still stand.
                await kopf.execute(
                    fns={
                        "notify_backup_running":
                        subhandler_partial(self._notify_backup_running, logger)
                    })
                raise kopf.TemporaryError(
                    "A snapshot is currently in progress, "
                    f"waiting for it to finish: {statement}",
                    delay=30,
                )
async def restart_cluster(namespace: str, name: str, total_nodes: int,
                          logger: logging.Logger) -> None:
    """
    Perform a rolling restart of the CrateDB cluster ``name`` in ``namespace``.

    One node at a time, this function will terminate first the master nodes and
    then the data nodes in the cluster. After triggering a pod's termination,
    the operator will wait for that pod to be terminated and gone. It will then
    wait for the cluster to have the desired number of nodes again and for the
    cluster to be in a ``GREEN`` state.

    :param namespace: The Kubernetes namespace where to look up CrateDB cluster.
    :param name: The CrateDB custom resource name defining the CrateDB cluster.
    :param total_nodes: The total number of nodes that the cluster should
        consist of, per the CrateDB cluster spec.
    """
    coapi = CustomObjectsApi()
    core = CoreV1Api()

    cluster = await coapi.get_namespaced_custom_object(
        group=API_GROUP,
        version="v1",
        plural=RESOURCE_CRATEDB,
        namespace=namespace,
        name=name,
    )
    password = await get_system_user_password(namespace, name, core)
    host = await get_host(core, namespace, name)
    conn_factory = connection_factory(host, password)

    if "master" in cluster["spec"]["nodes"]:
        await restart_statefulset(core, conn_factory, namespace, name,
                                  "master", total_nodes, logger)
    for node_spec in cluster["spec"]["nodes"]["data"]:
        await restart_statefulset(core, conn_factory, namespace, name,
                                  node_spec["name"], total_nodes, logger)
Exemple #23
0
async def test_bootstrap_license(
    bootstrap_system_user: mock.AsyncMock,
    bootstrap_license_mock: mock.AsyncMock,
    faker,
    namespace,
    cleanup_handler,
    kopf_runner,
    api_client,
):
    coapi = CustomObjectsApi(api_client)
    core = CoreV1Api(api_client)
    name = faker.domain_word()
    license = base64.b64encode(faker.binary(64)).decode()

    cleanup_handler.append(
        core.delete_persistent_volume(
            name=f"temp-pv-{namespace.metadata.name}-{name}"), )
    await core.create_namespaced_secret(
        namespace=namespace.metadata.name,
        body=V1Secret(
            data={"license": b64encode(license)},
            metadata=V1ObjectMeta(name=f"license-{name}"),
            type="Opaque",
        ),
    )
    await coapi.create_namespaced_custom_object(
        group=API_GROUP,
        version="v1",
        plural=RESOURCE_CRATEDB,
        namespace=namespace.metadata.name,
        body={
            "apiVersion": "cloud.crate.io/v1",
            "kind": "CrateDB",
            "metadata": {
                "name": name
            },
            "spec": {
                "cluster": {
                    "imageRegistry": "crate",
                    "license": {
                        "secretKeyRef": {
                            "key": "license",
                            "name": f"license-{name}"
                        },
                    },
                    "name": "my-crate-cluster",
                    "version": CRATE_VERSION,
                },
                "nodes": {
                    "data": [{
                        "name": "data",
                        "replicas": 1,
                        "resources": {
                            "cpus": 0.5,
                            "memory": "1Gi",
                            "heapRatio": 0.25,
                            "disk": {
                                "storageClass": "default",
                                "size": "16GiB",
                                "count": 1,
                            },
                        },
                    }]
                },
            },
        },
    )
    await assert_wait_for(
        True,
        was_license_set,
        bootstrap_license_mock,
        mock.ANY,
        namespace.metadata.name,
        f"crate-data-data-{name}-0",
        False,
        {"secretKeyRef": {
            "key": "license",
            "name": f"license-{name}"
        }},
        timeout=DEFAULT_TIMEOUT * 3,
    )
Exemple #24
0
async def start_cluster(
    name: str,
    namespace: V1Namespace,
    cleanup_handler,
    core: CoreV1Api,
    coapi: CustomObjectsApi,
    hot_nodes: int = 0,
    crate_version: str = CRATE_VERSION,
) -> Tuple[str, str]:
    # Clean up persistent volume after the test
    cleanup_handler.append(
        core.delete_persistent_volume(
            name=f"temp-pv-{namespace.metadata.name}-{name}"))
    body = {
        "apiVersion": "cloud.crate.io/v1",
        "kind": "CrateDB",
        "metadata": {
            "name": name
        },
        "spec": {
            "cluster": {
                "imageRegistry": "crate",
                "name": "my-crate-cluster",
                "version": crate_version,
            },
            "nodes": {
                "data": [
                    {
                        "name": "hot",
                        "replicas": hot_nodes,
                        "resources": {
                            "cpus": 0.5,
                            "memory": "1Gi",
                            "heapRatio": 0.25,
                            "disk": {
                                "storageClass": "default",
                                "size": "16GiB",
                                "count": 1,
                            },
                        },
                    },
                ]
            },
        },
    }
    await coapi.create_namespaced_custom_object(
        group=API_GROUP,
        version="v1",
        plural=RESOURCE_CRATEDB,
        namespace=namespace.metadata.name,
        body=body,
    )

    host = await asyncio.wait_for(
        get_public_host(core, namespace.metadata.name, name),
        # It takes a while to retrieve an external IP on AKS.
        timeout=DEFAULT_TIMEOUT * 5,
    )
    password = await get_system_user_password(core, namespace.metadata.name,
                                              name)

    await assert_wait_for(
        True,
        is_cluster_healthy,
        connection_factory(host, password),
        hot_nodes,
        err_msg="Cluster wasn't healthy after 5 minutes.",
        timeout=DEFAULT_TIMEOUT * 5,
    )

    return host, password
Exemple #25
0
async def test_bootstrap_users(
    bootstrap_license_mock: mock.AsyncMock,
    faker,
    namespace,
    cleanup_handler,
    kopf_runner,
):
    coapi = CustomObjectsApi()
    core = CoreV1Api()
    name = faker.domain_word()
    password1 = faker.password(length=40)
    password2 = faker.password(length=30)
    username1 = faker.user_name()
    username2 = faker.user_name()

    cleanup_handler.append(
        core.delete_persistent_volume(name=f"temp-pv-{namespace.metadata.name}-{name}")
    )
    await asyncio.gather(
        core.create_namespaced_secret(
            namespace=namespace.metadata.name,
            body=V1Secret(
                data={"password": b64encode(password1)},
                metadata=V1ObjectMeta(name=f"user-{name}-1"),
                type="Opaque",
            ),
        ),
        core.create_namespaced_secret(
            namespace=namespace.metadata.name,
            body=V1Secret(
                data={"password": b64encode(password2)},
                metadata=V1ObjectMeta(name=f"user-{name}-2"),
                type="Opaque",
            ),
        ),
    )

    await coapi.create_namespaced_custom_object(
        group=API_GROUP,
        version="v1",
        plural=RESOURCE_CRATEDB,
        namespace=namespace.metadata.name,
        body={
            "apiVersion": "cloud.crate.io/v1",
            "kind": "CrateDB",
            "metadata": {"name": name},
            "spec": {
                "cluster": {
                    "imageRegistry": "crate",
                    "name": "my-crate-cluster",
                    "version": "4.1.5",
                },
                "nodes": {
                    "data": [
                        {
                            "name": "data",
                            "replicas": 1,
                            "resources": {
                                "cpus": 0.5,
                                "memory": "1Gi",
                                "heapRatio": 0.25,
                                "disk": {
                                    "storageClass": "default",
                                    "size": "16GiB",
                                    "count": 1,
                                },
                            },
                        }
                    ]
                },
                "users": [
                    {
                        "name": username1,
                        "password": {
                            "secretKeyRef": {
                                "key": "password",
                                "name": f"user-{name}-1",
                            }
                        },
                    },
                    {
                        "name": username2,
                        "password": {
                            "secretKeyRef": {
                                "key": "password",
                                "name": f"user-{name}-2",
                            }
                        },
                    },
                ],
            },
        },
    )

    host = await asyncio.wait_for(
        get_public_host(core, namespace.metadata.name, name),
        timeout=BACKOFF_TIME * 5,  # It takes a while to retrieve an external IP on AKS.
    )

    password_system = await get_system_user_password(
        namespace.metadata.name, name, core
    )
    await assert_wait_for(
        True,
        does_user_exist,
        host,
        password_system,
        SYSTEM_USERNAME,
        timeout=BACKOFF_TIME * 5,
    )

    await assert_wait_for(
        True, does_user_exist, host, password1, username1, timeout=BACKOFF_TIME * 3,
    )

    await assert_wait_for(
        True, does_user_exist, host, password2, username2, timeout=BACKOFF_TIME * 3,
    )
Exemple #26
0
async def test_restart_cluster(faker, namespace, cleanup_handler, cratedb_crd,
                               kopf_runner):
    coapi = CustomObjectsApi()
    core = CoreV1Api()
    name = faker.domain_word()

    # Clean up persistent volume after the test
    cleanup_handler.append(
        core.delete_persistent_volume(
            name=f"temp-pv-{namespace.metadata.name}-{name}"))
    await coapi.create_namespaced_custom_object(
        group=API_GROUP,
        version="v1",
        plural=RESOURCE_CRATEDB,
        namespace=namespace.metadata.name,
        body={
            "apiVersion": "cloud.crate.io/v1",
            "kind": "CrateDB",
            "metadata": {
                "name": name
            },
            "spec": {
                "cluster": {
                    "imageRegistry": "crate",
                    "name": "my-crate-cluster",
                    "version": "4.1.5",
                },
                "nodes": {
                    "data": [
                        {
                            "name": "hot",
                            "replicas": 1,
                            "resources": {
                                "cpus": 0.5,
                                "memory": "1Gi",
                                "heapRatio": 0.25,
                                "disk": {
                                    "storageClass": "default",
                                    "size": "16GiB",
                                    "count": 1,
                                },
                            },
                        },
                        {
                            "name": "cold",
                            "replicas": 2,
                            "resources": {
                                "cpus": 0.5,
                                "memory": "1Gi",
                                "heapRatio": 0.25,
                                "disk": {
                                    "storageClass": "default",
                                    "size": "16GiB",
                                    "count": 1,
                                },
                            },
                        },
                    ],
                },
            },
        },
    )

    host = await asyncio.wait_for(
        get_public_host(core, namespace.metadata.name, name),
        timeout=BACKOFF_TIME *
        5,  # It takes a while to retrieve an external IP on AKS.
    )

    password = await get_system_user_password(namespace.metadata.name, name,
                                              core)

    await assert_wait_for(
        True,
        do_pods_exist,
        core,
        namespace.metadata.name,
        {
            f"crate-data-hot-{name}-0",
            f"crate-data-cold-{name}-0",
            f"crate-data-cold-{name}-1",
        },
    )

    await assert_wait_for(
        True,
        is_cluster_healthy,
        connection_factory(host, password),
        err_msg="Cluster wasn't healthy after 5 minutes.",
        timeout=BACKOFF_TIME * 5,
    )

    pods = await core.list_namespaced_pod(namespace=namespace.metadata.name)
    original_pods = {p.metadata.uid for p in pods.items}

    await asyncio.wait_for(
        restart_cluster(namespace.metadata.name, name, 3,
                        logging.getLogger(__name__)),
        BACKOFF_TIME * 15,
    )

    pods = await core.list_namespaced_pod(namespace=namespace.metadata.name)
    new_pods = {p.metadata.uid for p in pods.items}

    assert original_pods.intersection(new_pods) == set()
async def test_scale_cluster(
    repl_master_from,
    repl_master_to,
    repl_hot_from,
    repl_hot_to,
    repl_cold_from,
    repl_cold_to,
    faker,
    namespace,
    cleanup_handler,
    cratedb_crd,
    kopf_runner,
):
    coapi = CustomObjectsApi()
    core = CoreV1Api()
    name = faker.domain_word()

    # Clean up persistent volume after the test
    cleanup_handler.append(
        core.delete_persistent_volume(
            name=f"temp-pv-{namespace.metadata.name}-{name}"))
    body = {
        "apiVersion": "cloud.crate.io/v1",
        "kind": "CrateDB",
        "metadata": {
            "name": name
        },
        "spec": {
            "cluster": {
                "imageRegistry": "crate",
                "name": "my-crate-cluster",
                "version": "4.1.5",
            },
            "nodes": {
                "data": []
            },
        },
    }
    if repl_master_from:
        body["spec"]["nodes"]["master"] = {
            "replicas": repl_master_from,
            "resources": {
                "cpus": 0.5,
                "memory": "1Gi",
                "heapRatio": 0.25,
                "disk": {
                    "storageClass": "default",
                    "size": "16GiB",
                    "count": 1
                },
            },
        }
    body["spec"]["nodes"]["data"].append(
        {
            "name": "hot",
            "replicas": repl_hot_from,
            "resources": {
                "cpus": 0.5,
                "memory": "1Gi",
                "heapRatio": 0.25,
                "disk": {
                    "storageClass": "default",
                    "size": "16GiB",
                    "count": 1
                },
            },
        }, )
    if repl_cold_from:
        body["spec"]["nodes"]["data"].append(
            {
                "name": "cold",
                "replicas": repl_cold_from,
                "resources": {
                    "cpus": 0.5,
                    "memory": "1Gi",
                    "heapRatio": 0.25,
                    "disk": {
                        "storageClass": "default",
                        "size": "16GiB",
                        "count": 1
                    },
                },
            }, )
    await coapi.create_namespaced_custom_object(
        group=API_GROUP,
        version="v1",
        plural=RESOURCE_CRATEDB,
        namespace=namespace.metadata.name,
        body=body,
    )

    host = await asyncio.wait_for(
        get_public_host(core, namespace.metadata.name, name),
        timeout=BACKOFF_TIME *
        5,  # It takes a while to retrieve an external IP on AKS.
    )
    password = await get_system_user_password(namespace.metadata.name, name,
                                              core)

    await assert_wait_for(
        True,
        is_cluster_healthy,
        connection_factory(host, password),
        repl_master_from + repl_hot_from + repl_cold_from,
        err_msg="Cluster wasn't healthy after 5 minutes.",
        timeout=BACKOFF_TIME * 5,
    )

    patch_body = []
    if repl_master_from != repl_master_to:
        patch_body.append({
            "op": "replace",
            "path": "/spec/nodes/master/replicas",
            "value": repl_master_to,
        })
    if repl_hot_from != repl_hot_to:
        patch_body.append({
            "op": "replace",
            "path": "/spec/nodes/data/0/replicas",
            "value": repl_hot_to,
        })
    if repl_cold_from != repl_cold_to:
        patch_body.append({
            "op": "replace",
            "path": "/spec/nodes/data/1/replicas",
            "value": repl_cold_to,
        })
    await coapi.patch_namespaced_custom_object(
        group=API_GROUP,
        version="v1",
        plural=RESOURCE_CRATEDB,
        namespace=namespace.metadata.name,
        name=name,
        body=patch_body,
    )

    await assert_wait_for(
        True,
        is_cluster_healthy,
        connection_factory(host, password),
        repl_master_to + repl_hot_to + repl_cold_to,
        err_msg="Cluster wasn't healthy after 5 minutes.",
        timeout=BACKOFF_TIME * 5,
    )
async def test_update_cluster_password(faker, namespace, cleanup_handler,
                                       kopf_runner, api_client):
    coapi = CustomObjectsApi(api_client)
    core = CoreV1Api(api_client)
    name = faker.domain_word()
    password = faker.password(length=40)
    new_password = faker.password(length=40)
    username = faker.user_name()

    cleanup_handler.append(
        core.delete_persistent_volume(
            name=f"temp-pv-{namespace.metadata.name}-{name}"))
    await asyncio.gather(
        core.create_namespaced_secret(
            namespace=namespace.metadata.name,
            body=V1Secret(
                data={"password": b64encode(password)},
                metadata=V1ObjectMeta(name=f"user-{name}",
                                      labels={LABEL_USER_PASSWORD: "******"}),
                type="Opaque",
            ),
        ), )

    await coapi.create_namespaced_custom_object(
        group=API_GROUP,
        version="v1",
        plural=RESOURCE_CRATEDB,
        namespace=namespace.metadata.name,
        body={
            "apiVersion": "cloud.crate.io/v1",
            "kind": "CrateDB",
            "metadata": {
                "name": name
            },
            "spec": {
                "cluster": {
                    "imageRegistry": "crate",
                    "name": "my-crate-cluster",
                    "version": CRATE_VERSION,
                },
                "nodes": {
                    "data": [{
                        "name": "data",
                        "replicas": 1,
                        "resources": {
                            "cpus": 0.5,
                            "memory": "1Gi",
                            "heapRatio": 0.25,
                            "disk": {
                                "storageClass": "default",
                                "size": "16GiB",
                                "count": 1,
                            },
                        },
                    }]
                },
                "users": [
                    {
                        "name": username,
                        "password": {
                            "secretKeyRef": {
                                "key": "password",
                                "name": f"user-{name}",
                            }
                        },
                    },
                ],
            },
        },
    )

    host = await asyncio.wait_for(
        get_public_host(core, namespace.metadata.name, name),
        # It takes a while to retrieve an external IP on AKS.
        timeout=DEFAULT_TIMEOUT * 5,
    )

    await core.patch_namespaced_secret(
        namespace=namespace.metadata.name,
        name=f"user-{name}",
        body=V1Secret(data={"password": b64encode(new_password)}, ),
    )

    await assert_wait_for(
        True,
        is_password_set,
        host,
        new_password,
        username,
        timeout=DEFAULT_TIMEOUT * 5,
    )
Exemple #29
0
async def cluster_create(namespace: str, meta: kopf.Meta, spec: kopf.Spec,
                         logger: logging.Logger, **kwargs):
    name = meta["name"]
    base_labels = {
        LABEL_MANAGED_BY: "crate-operator",
        LABEL_NAME: name,
        LABEL_PART_OF: "cratedb",
    }
    cratedb_labels = base_labels.copy()
    cratedb_labels[LABEL_COMPONENT] = "cratedb"
    cratedb_labels.update(meta.get("labels", {}))

    apps = AppsV1Api()
    batchv1_beta1 = BatchV1beta1Api()
    core = CoreV1Api()

    owner_references = [
        V1OwnerReference(
            api_version=f"{API_GROUP}/v1",
            block_owner_deletion=True,
            controller=True,
            kind="CrateDB",
            name=name,
            uid=meta["uid"],
        )
    ]

    image_pull_secrets = ([
        V1LocalObjectReference(name=secret)
        for secret in config.IMAGE_PULL_SECRETS
    ] if config.IMAGE_PULL_SECRETS else None)

    ports_spec = spec.get("ports", {})
    http_port = ports_spec.get("http", Port.HTTP.value)
    jmx_port = ports_spec.get("jmx", Port.JMX.value)
    postgres_port = ports_spec.get("postgres", Port.POSTGRES.value)
    prometheus_port = ports_spec.get("prometheus", Port.PROMETHEUS.value)
    transport_port = ports_spec.get("transport", Port.TRANSPORT.value)

    master_nodes = get_master_nodes_names(spec["nodes"])
    total_nodes_count = get_total_nodes_count(spec["nodes"])
    crate_image = spec["cluster"]["imageRegistry"] + ":" + spec["cluster"][
        "version"]
    has_master_nodes = "master" in spec["nodes"]
    # The first StatefulSet we create references a set of master nodes. These
    # can either be explicit CrateDB master nodes, or implicit ones, which
    # would be the first set of nodes from the data nodes list.
    #
    # After the first StatefulSet was created, we set `treat_as_master` to
    # `False` to indicate that all remaining StatefulSets are neither explicit
    # nor implicit master nodes.
    treat_as_master = True
    sts = []
    cluster_name = spec["cluster"]["name"]
    if has_master_nodes:
        sts.append(
            create_statefulset(
                apps,
                owner_references,
                namespace,
                name,
                cratedb_labels,
                treat_as_master,
                False,
                cluster_name,
                "master",
                "master-",
                spec["nodes"]["master"],
                master_nodes,
                total_nodes_count,
                http_port,
                jmx_port,
                postgres_port,
                prometheus_port,
                transport_port,
                crate_image,
                spec["cluster"].get("ssl"),
                spec["cluster"].get("settings"),
                image_pull_secrets,
                logger,
            ))
        treat_as_master = False
    for node_spec in spec["nodes"]["data"]:
        node_name = node_spec["name"]
        sts.append(
            create_statefulset(
                apps,
                owner_references,
                namespace,
                name,
                cratedb_labels,
                treat_as_master,
                True,
                cluster_name,
                node_name,
                f"data-{node_name}-",
                node_spec,
                master_nodes,
                total_nodes_count,
                http_port,
                jmx_port,
                postgres_port,
                prometheus_port,
                transport_port,
                crate_image,
                spec["cluster"].get("ssl"),
                spec["cluster"].get("settings"),
                image_pull_secrets,
                logger,
            ))
        treat_as_master = False

    await asyncio.gather(
        create_sql_exporter_config(core, owner_references, namespace, name,
                                   cratedb_labels, logger),
        *create_debug_volume(core, owner_references, namespace, name,
                             cratedb_labels, logger),
        create_system_user(core, owner_references, namespace, name,
                           cratedb_labels, logger),
        *sts,
        *create_services(
            core,
            owner_references,
            namespace,
            name,
            cratedb_labels,
            http_port,
            postgres_port,
            transport_port,
            spec.get("cluster", {}).get("externalDNS"),
            logger,
        ),
    )

    if has_master_nodes:
        master_node_pod = f"crate-master-{name}-0"
    else:
        node_name = spec["nodes"]["data"][0]["name"]
        master_node_pod = f"crate-data-{node_name}-{name}-0"

    await with_timeout(
        bootstrap_cluster(
            core,
            namespace,
            name,
            master_node_pod,
            spec["cluster"].get("license"),
            "ssl" in spec["cluster"],
            spec.get("users"),
            logger,
        ),
        config.BOOTSTRAP_TIMEOUT,
        (f"Failed to bootstrap cluster {namespace}/{name} after "
         f"{config.BOOTSTRAP_TIMEOUT} seconds."),
    )

    if "backups" in spec:
        backup_metrics_labels = base_labels.copy()
        backup_metrics_labels[LABEL_COMPONENT] = "backup"
        backup_metrics_labels.update(meta.get("labels", {}))
        await asyncio.gather(*create_backups(
            apps,
            batchv1_beta1,
            owner_references,
            namespace,
            name,
            backup_metrics_labels,
            http_port,
            prometheus_port,
            spec["backups"],
            image_pull_secrets,
            "ssl" in spec["cluster"],
            logger,
        ))
Exemple #30
0
    async def handle(  # type: ignore
        self,
        namespace: str,
        name: str,
        spec: kopf.Spec,
        old: kopf.Body,
        diff: kopf.Diff,
        logger: logging.Logger,
        **kwargs: Any,
    ):
        scale_master_diff_item: Optional[kopf.DiffItem] = None
        scale_data_diff_items: Optional[List[kopf.DiffItem]] = None

        for operation, field_path, old_value, new_value in diff:
            if field_path == ("spec", "nodes", "master", "replicas"):
                scale_master_diff_item = kopf.DiffItem(
                    operation, field_path, old_value, new_value
                )
            elif field_path == ("spec", "nodes", "data"):
                # TODO: check for data node order, added or removed types, ...
                if len(old_value) != len(new_value):
                    raise kopf.PermanentError(
                        "Adding and removing node specs is not supported at this time."
                    )
                scale_data_diff_items = []
                for node_spec_idx in range(len(old_value)):
                    old_spec = old_value[node_spec_idx]
                    new_spec = new_value[node_spec_idx]
                    inner_diff = calc_diff(old_spec, new_spec)
                    for (
                        inner_operation,
                        inner_field_path,
                        inner_old_value,
                        inner_new_value,
                    ) in inner_diff:
                        if inner_field_path == ("replicas",):
                            scale_data_diff_items.append(
                                kopf.DiffItem(
                                    inner_operation,
                                    (str(node_spec_idx),) + inner_field_path,
                                    inner_old_value,
                                    inner_new_value,
                                )
                            )
                        else:
                            logger.info(
                                "Ignoring operation %s on field %s",
                                operation,
                                field_path + (str(node_spec_idx),) + inner_field_path,
                            )
            else:
                logger.info("Ignoring operation %s on field %s", operation, field_path)

        async with ApiClient() as api_client:
            apps = AppsV1Api(api_client)
            core = CoreV1Api(api_client)

            await scale_cluster(
                apps,
                core,
                namespace,
                name,
                old,
                scale_master_diff_item,
                (kopf.Diff(scale_data_diff_items) if scale_data_diff_items else None),
                logger,
            )

        self.schedule_notification(
            WebhookEvent.SCALE,
            WebhookScalePayload(
                old_data_replicas=[
                    WebhookScaleNodePayload(
                        name=item["name"], replicas=item["replicas"]
                    )
                    for item in old["spec"]["nodes"]["data"]
                ],
                new_data_replicas=[
                    WebhookScaleNodePayload(
                        name=item["name"], replicas=item["replicas"]
                    )
                    for item in spec["nodes"]["data"]
                ],
                old_master_replicas=old["spec"]["nodes"]
                .get("master", {})
                .get("replicas"),
                new_master_replicas=spec["nodes"].get("master", {}).get("replicas"),
            ),
            WebhookStatus.SUCCESS,
        )
        await self.send_notifications(logger)