Ejemplo n.º 1
0
def test_payload_serialization_upgrade():
    p = WebhookPayload(
        event=WebhookEvent.UPGRADE,
        status=WebhookStatus.FAILURE,
        namespace="some-namespace",
        cluster="some-cluster",
        scale_data=None,
        upgrade_data=WebhookUpgradePayload(
            old_registry="a",
            new_registry="b",
            old_version="c",
            new_version="d",
        ),
    )
    assert json.loads(json.dumps(p)) == {
        "event": "upgrade",
        "status": "failure",
        "namespace": "some-namespace",
        "cluster": "some-cluster",
        "scale_data": None,
        "upgrade_data": {
            "old_registry": "a",
            "new_registry": "b",
            "old_version": "c",
            "new_version": "d",
        },
    }
Ejemplo n.º 2
0
 async def test_not_configured(self):
     client = WebhookClient()
     response = await client.send_upgrade_notification(
         WebhookStatus.SUCCESS,
         "my-namespace",
         "my-cluster",
         WebhookUpgradePayload(old_registry="a",
                               new_registry="b",
                               old_version="c",
                               new_version="d"),
         logging.getLogger(__name__),
     )
     assert response is None
Ejemplo n.º 3
0
 async def test_error(self):
     client = WebhookClient()
     client.configure(
         f"{self.server.scheme}://{self.server.host}:{self.server.port}/error/",
         "itsme",
         "secr3t password",
     )
     response = await client.send_upgrade_notification(
         WebhookStatus.SUCCESS,
         "my-namespace",
         "my-cluster",
         WebhookUpgradePayload(old_registry="a",
                               new_registry="b",
                               old_version="c",
                               new_version="d"),
         logging.getLogger(__name__),
     )
     assert response.status == 418
Ejemplo n.º 4
0
 async def test_send_upgrade_notification(self):
     client = WebhookClient()
     client.configure(
         f"{self.server.scheme}://{self.server.host}:{self.server.port}/some/path/",
         "itsme",
         "secr3t password",
     )
     response = await client.send_upgrade_notification(
         WebhookStatus.SUCCESS,
         "my-namespace",
         "my-cluster",
         WebhookUpgradePayload(
             old_registry="a",
             new_registry="b",
             old_version="c",
             new_version="d",
         ),
         logging.getLogger(__name__),
     )
     assert response.status == 200
     data = await response.json()
     assert data == {
         "username": "******",
         "password": "******",
         "payload": {
             "event": "upgrade",
             "status": "success",
             "namespace": "my-namespace",
             "cluster": "my-cluster",
             "scale_data": None,
             "upgrade_data": {
                 "old_registry": "a",
                 "new_registry": "b",
                 "old_version": "c",
                 "new_version": "d",
             },
         },
     }
Ejemplo n.º 5
0
async def cluster_update(
    namespace: str,
    name: str,
    body: kopf.Body,
    spec: kopf.Spec,
    diff: kopf.Diff,
    old: kopf.Body,
    logger: logging.Logger,
    **kwargs,
):
    """
    Implement any updates to a cluster. The handler will sort out the logic of
    what to update, in which order, and when to trigger a restart of a cluster.
    """
    apps = AppsV1Api()
    do_scale_master = False
    do_scale_data = False
    do_upgrade = False
    requires_restart = False
    scale_master_diff_item: Optional[kopf.DiffItem] = None
    scale_data_diff_items: Optional[List[kopf.DiffItem]] = None

    for operation, field_path, old_value, new_value in diff:
        if field_path in {
            ("spec", "cluster", "imageRegistry"),
            (
                "spec",
                "cluster",
                "version",
            ),
        }:
            do_upgrade = True
        elif field_path == ("spec", "nodes", "master", "replicas"):
            do_scale_master = True
            scale_master_diff_item = kopf.DiffItem(operation, field_path,
                                                   old_value, new_value)
        elif field_path == ("spec", "nodes", "data"):
            # TODO: check for data node order, added or removed types, ...
            if len(old_value) != len(new_value):
                raise kopf.PermanentError(
                    "Cannot handle changes to the number of node specs.")
            scale_data_diff_items = []
            for node_spec_idx in range(len(old_value)):
                old_spec = old_value[node_spec_idx]
                new_spec = new_value[node_spec_idx]
                inner_diff = calc_diff(old_spec, new_spec)
                for (
                        inner_operation,
                        inner_field_path,
                        inner_old_value,
                        inner_new_value,
                ) in inner_diff:
                    if inner_field_path == ("replicas", ):
                        do_scale_data = True
                        scale_data_diff_items.append(
                            kopf.DiffItem(
                                inner_operation,
                                (str(node_spec_idx), ) + inner_field_path,
                                inner_old_value,
                                inner_new_value,
                            ))
                    else:
                        logger.info(
                            "Ignoring operation %s on field %s",
                            operation,
                            field_path + (str(node_spec_idx), ) +
                            inner_field_path,
                        )
        else:
            logger.info("Ignoring operation %s on field %s", operation,
                        field_path)

    if do_upgrade:
        webhook_upgrade_payload = WebhookUpgradePayload(
            old_registry=old["spec"]["cluster"]["imageRegistry"],
            new_registry=body.spec["cluster"]["imageRegistry"],
            old_version=old["spec"]["cluster"]["version"],
            new_version=body.spec["cluster"]["version"],
        )
        await upgrade_cluster(namespace, name, body)
        requires_restart = True

    if requires_restart:
        try:
            # We need to derive the desired number of nodes from the old spec,
            # since the new could have a different total number of nodes if a
            # scaling operation is in progress as well.
            expected_nodes = get_total_nodes_count(old["spec"]["nodes"])
            await with_timeout(
                restart_cluster(namespace, name, expected_nodes, logger),
                config.ROLLING_RESTART_TIMEOUT,
                (f"Failed to restart cluster {namespace}/{name} after "
                 f"{config.ROLLING_RESTART_TIMEOUT} seconds."),
            )
        except Exception:
            logger.exception("Failed to restart cluster")
            await webhook_client.send_upgrade_notification(
                WebhookStatus.FAILURE, namespace, name,
                webhook_upgrade_payload, logger)
            raise
        else:
            logger.info("Cluster restarted")
            if do_upgrade:
                await webhook_client.send_upgrade_notification(
                    WebhookStatus.SUCCESS,
                    namespace,
                    name,
                    webhook_upgrade_payload,
                    logger,
                )

    if do_scale_master or do_scale_data:
        webhook_scale_payload = WebhookScalePayload(
            old_data_replicas=[
                WebhookScaleNodePayload(name=item["name"],
                                        replicas=item["replicas"])
                for item in old["spec"]["nodes"]["data"]
            ],
            new_data_replicas=[
                WebhookScaleNodePayload(name=item["name"],
                                        replicas=item["replicas"])
                for item in body.spec["nodes"]["data"]
            ],
            old_master_replicas=old["spec"]["nodes"].get("master",
                                                         {}).get("replicas"),
            new_master_replicas=body.spec["nodes"].get("master",
                                                       {}).get("replicas"),
        )
        try:
            await with_timeout(
                scale_cluster(
                    apps,
                    namespace,
                    name,
                    do_scale_data,
                    do_scale_master,
                    get_total_nodes_count(old["spec"]["nodes"]),
                    spec,
                    scale_master_diff_item,
                    kopf.Diff(scale_data_diff_items)
                    if scale_data_diff_items else None,
                    logger,
                ),
                config.SCALING_TIMEOUT,
                (f"Failed to scale cluster {namespace}/{name} after "
                 f"{config.SCALING_TIMEOUT} seconds."),
            )
        except Exception:
            logger.exception("Failed to scale cluster")
            await webhook_client.send_scale_notification(
                WebhookStatus.FAILURE, namespace, name, webhook_scale_payload,
                logger)
            raise
        else:
            logger.info("Cluster scaled")
            await webhook_client.send_scale_notification(
                WebhookStatus.SUCCESS, namespace, name, webhook_scale_payload,
                logger)