Exemple #1
0
def test_master_node_replace() -> None:
    # Ideally, the pod will get placed on a different agent. This test will verify that the
    # remaining two masters find the replaced master at its new IP address. This requires a
    # reasonably low TTL for Java DNS lookups.
    sdk_cmd.svc_cli(package_name, service_name, "pod replace master-0")
    sdk_plan.wait_for_in_progress_recovery(service_name)
    sdk_plan.wait_for_completed_recovery(service_name)
Exemple #2
0
def test_master_node_replace():
    foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME)
    # Ideally, the pod will get placed on a different agent. This test will verify that the remaining two masters
    # find the replaced master at its new IP address. This requires a reasonably low TTL for Java DNS lookups.
    sdk_cmd.svc_cli(config.PACKAGE_NAME, foldered_name, 'pod replace master-0')
    sdk_plan.wait_for_in_progress_recovery(foldered_name)
    sdk_plan.wait_for_completed_recovery(foldered_name)
def test_master_node_replace() -> None:
    # Ideally, the pod will get placed on a different agent. This test will verify that the
    # remaining two masters find the replaced master at its new IP address. This requires a
    # reasonably low TTL for Java DNS lookups.
    sdk_cmd.svc_cli(package_name, service_name, "pod replace master-0")
    sdk_plan.wait_for_in_progress_recovery(service_name)
    sdk_plan.wait_for_completed_recovery(service_name)
def test_master_reelection():
    foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME)
    initial_master = config.get_elasticsearch_master(service_name=foldered_name)
    shakedown.kill_process_on_host(sdk_hosts.system_host(foldered_name, initial_master), "master__.*Elasticsearch")
    sdk_plan.wait_for_in_progress_recovery(foldered_name)
    sdk_plan.wait_for_completed_recovery(foldered_name)
    config.wait_for_expected_nodes_to_exist(service_name=foldered_name)
    new_master = config.get_elasticsearch_master(service_name=foldered_name)
    assert new_master.startswith("master") and new_master != initial_master
def test_master_reelection():
    initial_master = config.get_elasticsearch_master(service_name=foldered_name)
    shakedown.kill_process_on_host(sdk_hosts.system_host(foldered_name, initial_master), "master__.*Elasticsearch")
    sdk_plan.wait_for_in_progress_recovery(foldered_name)
    sdk_plan.wait_for_completed_recovery(foldered_name)
    config.wait_for_expected_nodes_to_exist(service_name=foldered_name)
    new_master = config.get_elasticsearch_master(service_name=foldered_name)
    assert new_master.startswith("master") and new_master != initial_master

    sdk_plan.wait_for_completed_deployment(foldered_name)
    sdk_plan.wait_for_completed_recovery(foldered_name)
Exemple #6
0
def test_master_reelection():
    initial_master = config.get_elasticsearch_master(service_name=foldered_name)
    sdk_cmd.kill_task_with_pattern(
        "master__.*Elasticsearch",
        "nobody",
        agent_host=sdk_tasks.get_service_tasks(foldered_name, initial_master)[0].host,
    )
    sdk_plan.wait_for_in_progress_recovery(foldered_name)
    sdk_plan.wait_for_completed_recovery(foldered_name)
    config.wait_for_expected_nodes_to_exist(service_name=foldered_name)
    new_master = config.get_elasticsearch_master(service_name=foldered_name)
    assert new_master.startswith("master") and new_master != initial_master

    sdk_plan.wait_for_completed_deployment(foldered_name)
    sdk_plan.wait_for_completed_recovery(foldered_name)
def test_master_reelection() -> None:
    initial_master = config.get_elasticsearch_master(service_name=service_name)
    sdk_cmd.kill_task_with_pattern(
        "master__.*Elasticsearch",
        "nobody",
        agent_host=sdk_tasks.get_service_tasks(service_name, initial_master)[0].host,
    )
    sdk_plan.wait_for_in_progress_recovery(service_name)
    sdk_plan.wait_for_completed_recovery(service_name)
    config.wait_for_expected_nodes_to_exist(service_name=service_name)
    new_master = config.get_elasticsearch_master(service_name=service_name)
    assert new_master.startswith("master") and new_master != initial_master

    sdk_plan.wait_for_completed_deployment(service_name)
    sdk_plan.wait_for_completed_recovery(service_name)
Exemple #8
0
def test_namenodes_acheive_quorum_after_journalnode_replace():
    """
    This test aims to check that namenodes recover after a journalnode failure.
    It checks the fix to this issue works: https://jira.apache.org/jira/browse/HDFS-10659.
    After the first Journal Node recovery, the second Journal Node pod replace triggers
    crash looping of both replaced Journal Node pod and all NameNode pods.
    """

    pod_list = ["journal-0", "journal-1", "journal-0"]
    for pod in pod_list:
        sdk_cmd.svc_cli(config.PACKAGE_NAME, foldered_name, "pod replace {}".format(pod))

        # waiting for recovery to start first before it completes to avoid timing issues
        sdk_plan.wait_for_in_progress_recovery(service_name=foldered_name, timeout_seconds=5 * 60)

        # sdk_plan.wait_for_completed_recovery includes tracking of failed tasks and will
        # terminate in case of a crash loop
        sdk_plan.wait_for_completed_recovery(service_name=foldered_name, timeout_seconds=5 * 60)
def test_coordinator_node_replace():
    sdk_cmd.svc_cli(config.PACKAGE_NAME, foldered_name, 'pod replace coordinator-0')
    sdk_plan.wait_for_in_progress_recovery(foldered_name)
    sdk_plan.wait_for_completed_recovery(foldered_name)
def test_master_node_replace():
    # Ideally, the pod will get placed on a different agent. This test will verify that the remaining two masters
    # find the replaced master at its new IP address. This requires a reasonably low TTL for Java DNS lookups.
    sdk_cmd.svc_cli(config.PACKAGE_NAME, foldered_name, 'pod replace master-0')
    sdk_plan.wait_for_in_progress_recovery(foldered_name)
    sdk_plan.wait_for_completed_recovery(foldered_name)
Exemple #11
0
def test_data_node_replace():
    foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME)
    sdk_cmd.svc_cli(config.PACKAGE_NAME, foldered_name, 'pod replace data-0')
    sdk_plan.wait_for_in_progress_recovery(foldered_name)
    sdk_plan.wait_for_completed_recovery(foldered_name)
def test_coordinator_node_replace() -> None:
    sdk_cmd.svc_cli(package_name, service_name, "pod replace coordinator-0")
    sdk_plan.wait_for_in_progress_recovery(service_name)
    sdk_plan.wait_for_completed_recovery(service_name)
Exemple #13
0
def test_data_node_replace():
    sdk_cmd.svc_cli(config.PACKAGE_NAME, foldered_name, "pod replace data-0")
    sdk_plan.wait_for_in_progress_recovery(foldered_name)
    sdk_plan.wait_for_completed_recovery(foldered_name)
Exemple #14
0
def test_coordinator_node_replace() -> None:
    sdk_cmd.svc_cli(package_name, service_name, "pod replace coordinator-0")
    sdk_plan.wait_for_in_progress_recovery(service_name)
    sdk_plan.wait_for_completed_recovery(service_name)
Exemple #15
0
def test_overlay_network():
    """Verify that the current deploy plan matches the expected plan from the spec."""

    deployment_plan = sdk_plan.wait_for_completed_deployment(
        config.SERVICE_NAME)
    log.info("deployment_plan: " + str(deployment_plan))

    # test that the deployment plan is correct
    assert (len(deployment_plan['phases']) == 5)
    assert (deployment_plan['phases'][0]['name'] == 'hello-overlay-deploy')
    assert (deployment_plan['phases'][1]['name'] == 'hello-overlay-vip-deploy')
    assert (deployment_plan['phases'][2]['name'] == 'hello-host-vip-deploy')
    assert (deployment_plan['phases'][3]['name'] == 'hello-host-deploy')
    assert (deployment_plan["phases"][4]["name"] == "getter-deploy")
    assert (len(deployment_plan['phases'][0]['steps']) == 1)
    assert (len(deployment_plan["phases"][1]["steps"]) == 1)
    assert (len(deployment_plan["phases"][2]["steps"]) == 1)
    assert (len(deployment_plan["phases"][3]["steps"]) == 1)
    assert (len(deployment_plan["phases"][4]["steps"]) == 1)

    # Due to DNS resolution flakiness, some of the deployed tasks can fail. If so,
    # we wait for them to redeploy, but if they don't fail we still want to proceed.
    try:
        sdk_plan.wait_for_in_progress_recovery(config.SERVICE_NAME,
                                               timeout_seconds=60)
        sdk_plan.wait_for_completed_recovery(config.SERVICE_NAME,
                                             timeout_seconds=60)
    except retrying.RetryError:
        pass

    # test that the tasks are all up, which tests the overlay DNS
    framework_tasks = [
        task for task in shakedown.get_service_tasks(config.SERVICE_NAME,
                                                     completed=False)
    ]
    framework_task_names = [t["name"] for t in framework_tasks]

    for expected_task in EXPECTED_TASKS:
        assert (expected_task
                in framework_task_names), "Missing {expected}".format(
                    expected=expected_task)

    for task in framework_tasks:
        name = task["name"]
        if "getter" in name:  # don't check the "getter" tasks because they don't use ports
            continue
        resources = task["resources"]
        if "host" in name:
            assert "ports" in resources.keys(
            ), "Task {} should have port resources".format(name)
        if "overlay" in name:
            assert "ports" not in resources.keys(
            ), "Task {} should NOT have port resources".format(name)

    sdk_networks.check_task_network("hello-overlay-0-server")
    sdk_networks.check_task_network("hello-overlay-vip-0-server")
    sdk_networks.check_task_network("hello-host-0-server",
                                    expected_network_name=None)
    sdk_networks.check_task_network("hello-host-vip-0-server",
                                    expected_network_name=None)

    endpoints_result = sdk_cmd.svc_cli(config.PACKAGE_NAME,
                                       config.SERVICE_NAME,
                                       'endpoints',
                                       json=True)
    assert len(endpoints_result
               ) == 2, "Wrong number of endpoints got {} should be 2".format(
                   len(endpoints_result))

    overlay_endpoints_result = sdk_cmd.svc_cli(config.PACKAGE_NAME,
                                               config.SERVICE_NAME,
                                               'endpoints overlay-vip',
                                               json=True)
    assert "address" in overlay_endpoints_result.keys(), "overlay endpoints missing 'address'"\
           "{}".format(overlay_endpoints_result)
    assert len(overlay_endpoints_result["address"]) == 1
    assert overlay_endpoints_result["address"][0].startswith("9")
    overlay_port = overlay_endpoints_result["address"][0].split(":")[-1]
    assert overlay_port == "4044"
    assert "dns" in overlay_endpoints_result.keys()
    assert len(overlay_endpoints_result["dns"]) == 1
    assert overlay_endpoints_result["dns"][0] == sdk_hosts.autoip_host(
        config.SERVICE_NAME, "hello-overlay-vip-0-server", 4044)

    host_endpoints_result = sdk_cmd.svc_cli(config.PACKAGE_NAME,
                                            config.SERVICE_NAME,
                                            'endpoints host-vip',
                                            json=True)
    assert "address" in host_endpoints_result.keys(), "overlay endpoints missing 'address'"\
           "{}".format(host_endpoints_result)
    assert len(host_endpoints_result["address"]) == 1
    assert host_endpoints_result["address"][0].startswith("10")
    host_port = host_endpoints_result["address"][0].split(":")[-1]
    assert host_port == "4044"
    assert "dns" in host_endpoints_result.keys()
    assert len(host_endpoints_result["dns"]) == 1
    assert host_endpoints_result["dns"][0] == sdk_hosts.autoip_host(
        config.SERVICE_NAME, "hello-host-vip-0-server", 4044)
    def restart_zookeeper_node(id: int):
        sdk_cmd.svc_cli(ZK_PACKAGE, ZK_SERVICE_NAME, "pod restart zookeeper-{}".format(id))

        sdk_plan.wait_for_in_progress_recovery(ZK_SERVICE_NAME)
        sdk_plan.wait_for_completed_recovery(ZK_SERVICE_NAME)
Exemple #17
0
def test_overlay_network():
    """Verify that the current deploy plan matches the expected plan from the spec."""

    deployment_plan = sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME)
    log.info("deployment_plan: " + str(deployment_plan))

    # test that the deployment plan is correct
    assert(len(deployment_plan['phases']) == 5)
    assert(deployment_plan['phases'][0]['name'] == 'hello-overlay-deploy')
    assert(deployment_plan['phases'][1]['name'] == 'hello-overlay-vip-deploy')
    assert(deployment_plan['phases'][2]['name'] == 'hello-host-vip-deploy')
    assert(deployment_plan['phases'][3]['name'] == 'hello-host-deploy')
    assert(deployment_plan["phases"][4]["name"] == "getter-deploy")
    assert(len(deployment_plan['phases'][0]['steps']) == 1)
    assert(len(deployment_plan["phases"][1]["steps"]) == 1)
    assert(len(deployment_plan["phases"][2]["steps"]) == 1)
    assert(len(deployment_plan["phases"][3]["steps"]) == 1)
    assert(len(deployment_plan["phases"][4]["steps"]) == 1)

    # Due to DNS resolution flakiness, some of the deployed tasks can fail. If so,
    # we wait for them to redeploy, but if they don't fail we still want to proceed.
    try:
        sdk_plan.wait_for_in_progress_recovery(config.SERVICE_NAME, timeout_seconds=60)
        sdk_plan.wait_for_completed_recovery(config.SERVICE_NAME, timeout_seconds=60)
    except TimeoutExpired:
        pass

    # test that the tasks are all up, which tests the overlay DNS
    framework_tasks = [task for task in shakedown.get_service_tasks(config.SERVICE_NAME, completed=False)]
    framework_task_names = [t["name"] for t in framework_tasks]

    for expected_task in EXPECTED_TASKS:
        assert(expected_task in framework_task_names), "Missing {expected}".format(expected=expected_task)

    for task in framework_tasks:
        name = task["name"]
        if "getter" in name:  # don't check the "getter" tasks because they don't use ports
            continue
        resources = task["resources"]
        if "host" in name:
            assert "ports" in resources.keys(), "Task {} should have port resources".format(name)
        if "overlay" in name:
            assert "ports" not in resources.keys(), "Task {} should NOT have port resources".format(name)

    sdk_networks.check_task_network("hello-overlay-0-server")
    sdk_networks.check_task_network("hello-overlay-vip-0-server")
    sdk_networks.check_task_network("hello-host-0-server", expected_network_name=None)
    sdk_networks.check_task_network("hello-host-vip-0-server", expected_network_name=None)

    endpoints_result = sdk_cmd.svc_cli(config.PACKAGE_NAME, config.SERVICE_NAME, 'endpoints', json=True)
    assert len(endpoints_result) == 2, "Wrong number of endpoints got {} should be 2".format(len(endpoints_result))

    overlay_endpoints_result = sdk_cmd.svc_cli(config.PACKAGE_NAME, config.SERVICE_NAME, 'endpoints overlay-vip', json=True)
    assert "address" in overlay_endpoints_result.keys(), "overlay endpoints missing 'address'"\
           "{}".format(overlay_endpoints_result)
    assert len(overlay_endpoints_result["address"]) == 1
    assert overlay_endpoints_result["address"][0].startswith("9")
    overlay_port = overlay_endpoints_result["address"][0].split(":")[-1]
    assert overlay_port == "4044"
    assert "dns" in overlay_endpoints_result.keys()
    assert len(overlay_endpoints_result["dns"]) == 1
    assert overlay_endpoints_result["dns"][0] == sdk_hosts.autoip_host(config.SERVICE_NAME, "hello-overlay-vip-0-server", 4044)

    host_endpoints_result = sdk_cmd.svc_cli(config.PACKAGE_NAME, config.SERVICE_NAME, 'endpoints host-vip', json=True)
    assert "address" in host_endpoints_result.keys(), "overlay endpoints missing 'address'"\
           "{}".format(host_endpoints_result)
    assert len(host_endpoints_result["address"]) == 1
    assert host_endpoints_result["address"][0].startswith("10")
    host_port = host_endpoints_result["address"][0].split(":")[-1]
    assert host_port == "4044"
    assert "dns" in host_endpoints_result.keys()
    assert len(host_endpoints_result["dns"]) == 1
    assert host_endpoints_result["dns"][0] == sdk_hosts.autoip_host(config.SERVICE_NAME, "hello-host-vip-0-server", 4044)