Esempio n. 1
0
def test_integrity_on_data_node_failure():
    """
    Verifies proper data replication among data nodes.
    """
    # An HDFS write will only successfully return when the data replication has taken place
    config.write_data_to_hdfs(config.SERVICE_NAME, config.TEST_FILE_1_NAME)

    sdk_tasks.kill_task_with_pattern("DataNode", sdk_hosts.system_host(config.SERVICE_NAME, 'data-0-node'))
    sdk_tasks.kill_task_with_pattern("DataNode", sdk_hosts.system_host(config.SERVICE_NAME, 'data-1-node'))

    config.read_data_from_hdfs(config.SERVICE_NAME, config.TEST_FILE_1_NAME)

    config.check_healthy(service_name=config.SERVICE_NAME)
Esempio n. 2
0
def test_integrity_on_data_node_failure():
    """
    Verifies proper data replication among data nodes.
    """
    test_filename = get_unique_filename("test_datanode_fail")

    # An HDFS write will only successfully return when the data replication has taken place
    config.write_data_to_hdfs(config.SERVICE_NAME, test_filename)

    sdk_cmd.kill_task_with_pattern("DataNode", sdk_hosts.system_host(config.SERVICE_NAME, 'data-0-node'))
    sdk_cmd.kill_task_with_pattern("DataNode", sdk_hosts.system_host(config.SERVICE_NAME, 'data-1-node'))

    config.read_data_from_hdfs(config.SERVICE_NAME, test_filename)

    config.check_healthy(service_name=config.SERVICE_NAME)
Esempio n. 3
0
def test_integrity_on_data_node_failure():
    write_some_data('data-0-node', TEST_FILE_1_NAME)

    # gives chance for write to succeed and replication to occur
    time.sleep(9)

    tasks.kill_task_with_pattern(
        "DataNode", hosts.system_host(FOLDERED_SERVICE_NAME, 'data-0-node'))
    tasks.kill_task_with_pattern(
        "DataNode", hosts.system_host(FOLDERED_SERVICE_NAME, 'data-1-node'))
    time.sleep(1)  # give DataNode a chance to die

    read_some_data('data-2-node', TEST_FILE_1_NAME)

    check_healthy()
Esempio n. 4
0
def get_metrics(service_name, task_name):
    """Return a list of metrics datapoints.

    Keyword arguments:
    service_name -- the name of the service to get metrics for
    task_name -- the name of the task whose agent to run metrics commands from
    """
    host = hosts.system_host(service_name, task_name)
    auth_token, _, _ = shakedown.run_dcos_command(
        'config show core.dcos_acs_token')
    auth_token = auth_token.strip()

    service_containers_cmd = """curl --header "Authorization: token={}"
        -s http://localhost:61001/system/v1/metrics/v0/containers""".format(
        auth_token).replace("\n", "")
    _, output = shakedown.run_command_on_agent(host, service_containers_cmd)
    # We need at least one container whose metrics we can return
    if output == "[]":
        return []

    # Sanitize output as it's a string-represented list i.e. '["bc005e73...","2ef32c62..."]'
    containers = ast.literal_eval(output)
    # Need just one container to probe so just get the first one
    container_id = containers[0]
    metrics_cmd = """curl --header "Authorization: token={}"
        -s http://localhost:61001/system/v1/metrics/v0/containers/{}/app""".format(
        auth_token, container_id).replace("\n", "")
    _, output = shakedown.run_command_on_agent(host, metrics_cmd)

    metrics = json.loads(output)
    return metrics["datapoints"]
Esempio n. 5
0
def test_integrity_on_data_node_failure():
    """
    Verifies proper data replication among data nodes.
    """
    test_filename = "test_datanode_fail"  # must be unique among tests in this suite

    # An HDFS write will only successfully return when the data replication has taken place
    config.write_data_to_hdfs(config.SERVICE_NAME, test_filename)

    sdk_cmd.kill_task_with_pattern(
        "DataNode", sdk_hosts.system_host(config.SERVICE_NAME, 'data-0-node'))
    sdk_cmd.kill_task_with_pattern(
        "DataNode", sdk_hosts.system_host(config.SERVICE_NAME, 'data-1-node'))

    config.read_data_from_hdfs(config.SERVICE_NAME, test_filename)

    config.check_healthy(service_name=config.SERVICE_NAME)
Esempio n. 6
0
def test_losing_and_regaining_index_health(default_populated_index):
    config.check_elasticsearch_index_health(config.DEFAULT_INDEX_NAME, "green", service_name=foldered_name)
    shakedown.kill_process_on_host(sdk_hosts.system_host(foldered_name, "data-0-node"), "data__.*Elasticsearch")
    config.check_elasticsearch_index_health(config.DEFAULT_INDEX_NAME, "yellow", service_name=foldered_name)
    config.check_elasticsearch_index_health(config.DEFAULT_INDEX_NAME, "green", service_name=foldered_name)

    sdk_plan.wait_for_completed_deployment(foldered_name)
    sdk_plan.wait_for_completed_recovery(foldered_name)
Esempio n. 7
0
def test_master_reelection():
    foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME)
    initial_master = config.get_elasticsearch_master(service_name=foldered_name)
    shakedown.kill_process_on_host(sdk_hosts.system_host(foldered_name, initial_master), "master__.*Elasticsearch")
    sdk_plan.wait_for_in_progress_recovery(foldered_name)
    sdk_plan.wait_for_completed_recovery(foldered_name)
    config.wait_for_expected_nodes_to_exist(service_name=foldered_name)
    new_master = config.get_elasticsearch_master(service_name=foldered_name)
    assert new_master.startswith("master") and new_master != initial_master
Esempio n. 8
0
def test_master_reelection():
    initial_master = config.get_elasticsearch_master(
        service_name=FOLDERED_SERVICE_NAME)
    shakedown.kill_process_on_host(sdk_hosts.system_host(
        FOLDERED_SERVICE_NAME, initial_master), "master__.*Elasticsearch")
    config.wait_for_expected_nodes_to_exist(service_name=FOLDERED_SERVICE_NAME)
    new_master = config.get_elasticsearch_master(
        service_name=FOLDERED_SERVICE_NAME)
    assert new_master.startswith("master") and new_master != initial_master
Esempio n. 9
0
def test_losing_and_regaining_index_health(default_populated_index):
    config.check_elasticsearch_index_health(
        config.DEFAULT_INDEX_NAME, "green", service_name=FOLDERED_SERVICE_NAME)
    shakedown.kill_process_on_host(sdk_hosts.system_host(
        FOLDERED_SERVICE_NAME, "data-0-node"), "data__.*Elasticsearch")
    config.check_elasticsearch_index_health(
        config.DEFAULT_INDEX_NAME, "yellow", service_name=FOLDERED_SERVICE_NAME)
    config.check_elasticsearch_index_health(
        config.DEFAULT_INDEX_NAME, "green", service_name=FOLDERED_SERVICE_NAME)
Esempio n. 10
0
def test_master_reelection():
    initial_master = get_elasticsearch_master(
        service_name=FOLDERED_SERVICE_NAME)
    shakedown.kill_process_on_host(
        hosts.system_host(FOLDERED_SERVICE_NAME, initial_master),
        "master__.*Elasticsearch")
    wait_for_expected_nodes_to_exist(service_name=FOLDERED_SERVICE_NAME)
    new_master = get_elasticsearch_master(service_name=FOLDERED_SERVICE_NAME)
    assert new_master.startswith("master") and new_master != initial_master
Esempio n. 11
0
def test_kill_name_node():
    name_ids = sdk_tasks.get_task_ids(config.FOLDERED_SERVICE_NAME, 'name-0')
    journal_ids = sdk_tasks.get_task_ids(config.FOLDERED_SERVICE_NAME, 'journal')
    data_ids = sdk_tasks.get_task_ids(config.FOLDERED_SERVICE_NAME, 'data')

    sdk_tasks.kill_task_with_pattern('namenode', sdk_hosts.system_host(config.FOLDERED_SERVICE_NAME, 'name-0-node'))
    config.expect_recovery(service_name=config.FOLDERED_SERVICE_NAME)
    sdk_tasks.check_tasks_updated(config.FOLDERED_SERVICE_NAME, 'name', name_ids)
    sdk_tasks.check_tasks_not_updated(config.FOLDERED_SERVICE_NAME, 'journal', journal_ids)
    sdk_tasks.check_tasks_not_updated(config.FOLDERED_SERVICE_NAME, 'data', data_ids)
Esempio n. 12
0
def test_kill_journal_node():
    journal_ids = sdk_tasks.get_task_ids(FOLDERED_SERVICE_NAME, 'journal-0')
    name_ids = sdk_tasks.get_task_ids(FOLDERED_SERVICE_NAME, 'name')
    data_ids = sdk_tasks.get_task_ids(FOLDERED_SERVICE_NAME, 'data')

    sdk_tasks.kill_task_with_pattern('journalnode', sdk_hosts.system_host(FOLDERED_SERVICE_NAME, 'journal-0-node'))
    config.expect_recovery(service_name=FOLDERED_SERVICE_NAME)
    sdk_tasks.check_tasks_updated(FOLDERED_SERVICE_NAME, 'journal', journal_ids)
    sdk_tasks.check_tasks_not_updated(FOLDERED_SERVICE_NAME, 'name', name_ids)
    sdk_tasks.check_tasks_not_updated(FOLDERED_SERVICE_NAME, 'data', data_ids)
Esempio n. 13
0
def test_kill_journal_node():
    foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME)
    journal_ids = sdk_tasks.get_task_ids(foldered_name, 'journal-0')
    name_ids = sdk_tasks.get_task_ids(foldered_name, 'name')
    data_ids = sdk_tasks.get_task_ids(foldered_name, 'data')

    sdk_tasks.kill_task_with_pattern('journalnode', sdk_hosts.system_host(foldered_name, 'journal-0-node'))
    config.expect_recovery(service_name=foldered_name)
    sdk_tasks.check_tasks_updated(foldered_name, 'journal', journal_ids)
    sdk_tasks.check_tasks_not_updated(foldered_name, 'name', name_ids)
    sdk_tasks.check_tasks_not_updated(foldered_name, 'data', data_ids)
Esempio n. 14
0
def test_master_reelection():
    initial_master = config.get_elasticsearch_master(service_name=foldered_name)
    shakedown.kill_process_on_host(sdk_hosts.system_host(foldered_name, initial_master), "master__.*Elasticsearch")
    sdk_plan.wait_for_in_progress_recovery(foldered_name)
    sdk_plan.wait_for_completed_recovery(foldered_name)
    config.wait_for_expected_nodes_to_exist(service_name=foldered_name)
    new_master = config.get_elasticsearch_master(service_name=foldered_name)
    assert new_master.startswith("master") and new_master != initial_master

    sdk_plan.wait_for_completed_deployment(foldered_name)
    sdk_plan.wait_for_completed_recovery(foldered_name)
Esempio n. 15
0
def test_kill_data_node():
    foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME)
    data_ids = sdk_tasks.get_task_ids(foldered_name, 'data-0')
    journal_ids = sdk_tasks.get_task_ids(foldered_name, 'journal')
    name_ids = sdk_tasks.get_task_ids(foldered_name, 'name')

    sdk_cmd.kill_task_with_pattern('datanode', sdk_hosts.system_host(foldered_name, 'data-0-node'))
    config.expect_recovery(service_name=foldered_name)
    sdk_tasks.check_tasks_updated(foldered_name, 'data', data_ids)
    sdk_tasks.check_tasks_not_updated(foldered_name, 'journal', journal_ids)
    sdk_tasks.check_tasks_not_updated(foldered_name, 'name', name_ids)
Esempio n. 16
0
def test_kill_data_node():
    data_ids = tasks.get_task_ids(FOLDERED_SERVICE_NAME, 'data-0')
    journal_ids = tasks.get_task_ids(FOLDERED_SERVICE_NAME, 'journal')
    name_ids = tasks.get_task_ids(FOLDERED_SERVICE_NAME, 'name')

    tasks.kill_task_with_pattern(
        'datanode', hosts.system_host(FOLDERED_SERVICE_NAME, 'data-0-node'))
    check_healthy()
    tasks.check_tasks_updated(FOLDERED_SERVICE_NAME, 'data', data_ids)
    tasks.check_tasks_not_updated(FOLDERED_SERVICE_NAME, 'journal',
                                  journal_ids)
    tasks.check_tasks_not_updated(FOLDERED_SERVICE_NAME, 'name', name_ids)
Esempio n. 17
0
def test_losing_and_regaining_index_health(default_populated_index):
    check_elasticsearch_index_health(DEFAULT_INDEX_NAME,
                                     "green",
                                     service_name=FOLDERED_SERVICE_NAME)
    shakedown.kill_process_on_host(
        hosts.system_host(FOLDERED_SERVICE_NAME, "data-0-node"),
        "data__.*Elasticsearch")
    check_elasticsearch_index_health(DEFAULT_INDEX_NAME,
                                     "yellow",
                                     service_name=FOLDERED_SERVICE_NAME)
    check_elasticsearch_index_health(DEFAULT_INDEX_NAME,
                                     "green",
                                     service_name=FOLDERED_SERVICE_NAME)
Esempio n. 18
0
def test_kill_data_node():
    data_ids = sdk_tasks.get_task_ids(FOLDERED_SERVICE_NAME, 'data-0')
    journal_ids = sdk_tasks.get_task_ids(FOLDERED_SERVICE_NAME, 'journal')
    name_ids = sdk_tasks.get_task_ids(FOLDERED_SERVICE_NAME, 'name')

    sdk_tasks.kill_task_with_pattern(
        'datanode', sdk_hosts.system_host(FOLDERED_SERVICE_NAME,
                                          'data-0-node'))
    expect_recovery()
    sdk_tasks.check_tasks_updated(FOLDERED_SERVICE_NAME, 'data', data_ids)
    sdk_tasks.check_tasks_not_updated(FOLDERED_SERVICE_NAME, 'journal',
                                      journal_ids)
    sdk_tasks.check_tasks_not_updated(FOLDERED_SERVICE_NAME, 'name', name_ids)
Esempio n. 19
0
def test_losing_and_regaining_index_health(default_populated_index):
    foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME)
    config.check_elasticsearch_index_health(config.DEFAULT_INDEX_NAME,
                                            "green",
                                            service_name=foldered_name)
    shakedown.kill_process_on_host(
        sdk_hosts.system_host(foldered_name, "data-0-node"),
        "data__.*Elasticsearch")
    config.check_elasticsearch_index_health(config.DEFAULT_INDEX_NAME,
                                            "yellow",
                                            service_name=foldered_name)
    config.check_elasticsearch_index_health(config.DEFAULT_INDEX_NAME,
                                            "green",
                                            service_name=foldered_name)
Esempio n. 20
0
def test_integrity_on_name_node_failure():
    """
    The first name node (name-0-node) is the active name node by default when HDFS gets installed.
    This test checks that it is possible to write and read data after the first name node fails.
    """
    tasks.kill_task_with_pattern(
        "NameNode", hosts.system_host(FOLDERED_SERVICE_NAME, 'name-0-node'))
    time.sleep(1)  # give NameNode a chance to die

    write_some_data('data-0-node', TEST_FILE_2_NAME)

    read_some_data('data-2-node', TEST_FILE_2_NAME)

    check_healthy()
Esempio n. 21
0
def test_kill_essential():
    '''kill the essential task, verify that both tasks are relaunched against a matching executor'''
    verify_shared_executor('hello-0')

    old_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'hello-0')
    assert len(old_ids) == 2

    sdk_cmd.kill_task_with_pattern(
        'shared-volume/essential', # hardcoded in cmd, see yml
        sdk_hosts.system_host(config.SERVICE_NAME, 'hello-0-essential'))

    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'hello-0', old_ids) # wait for ids to change...
    sdk_plan.wait_for_completed_recovery(config.SERVICE_NAME) # ...and for tasks to be up and running

    # the first verify_shared_executor call deleted the files. both should have come back via the relaunch.
    verify_shared_executor('hello-0', delete_files=False) # leave files as-is for the next test
Esempio n. 22
0
def run_hdfs_command(task_name, command):
    """
    Go into the Data Node hdfs directory, set JAVA_HOME, and execute the command.
    """
    host = hosts.system_host(FOLDERED_SERVICE_NAME, task_name)
    java_home = find_java_home(host)

    # Find hdfs home directory by looking up the Data Node process.
    # Hdfs directory is found in an arg to the java command.
    hdfs_dir_cmd = """ps -ef | grep hdfs | grep DataNode \
        | awk 'BEGIN {RS=" "}; /-Dhadoop.home.dir/' | sed s/-Dhadoop.home.dir=//"""
    full_command = """cd $({}) &&
        export JAVA_HOME={} &&
        {}""".format(hdfs_dir_cmd, java_home, command)

    rc, output = shakedown.run_command_on_agent(host, full_command)
    return rc, output
Esempio n. 23
0
def test_config_update_while_partitioned():
    world_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'world')
    host = sdk_hosts.system_host(config.SERVICE_NAME, "world-0-server")
    shakedown.partition_agent(host)

    service_config = sdk_marathon.get_config(config.SERVICE_NAME)
    updated_cpus = float(service_config['env']['WORLD_CPUS']) + 0.1
    service_config['env']['WORLD_CPUS'] = str(updated_cpus)
    sdk_marathon.update_app(config.SERVICE_NAME, service_config, wait_for_completed_deployment=False)

    shakedown.reconnect_agent(host)
    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'world', world_ids)
    config.check_running()
    all_tasks = shakedown.get_service_tasks(config.SERVICE_NAME)
    running_tasks = [t for t in all_tasks if t['name'].startswith('world') and t['state'] == "TASK_RUNNING"]
    assert len(running_tasks) == config.world_task_count(config.SERVICE_NAME)
    for t in running_tasks:
        assert config.close_enough(t['resources']['cpus'], updated_cpus)
def test_config_update_while_partitioned():
    world_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'world')
    host = sdk_hosts.system_host(config.SERVICE_NAME, "world-0-server")
    shakedown.partition_agent(host)

    service_config = sdk_marathon.get_config(config.SERVICE_NAME)
    updated_cpus = float(service_config['env']['WORLD_CPUS']) + 0.1
    service_config['env']['WORLD_CPUS'] = str(updated_cpus)
    sdk_marathon.update_app(config.SERVICE_NAME, service_config, wait_for_completed_deployment=False)

    shakedown.reconnect_agent(host)
    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'world', world_ids)
    config.check_running()
    all_tasks = shakedown.get_service_tasks(config.SERVICE_NAME)
    running_tasks = [t for t in all_tasks if t['name'].startswith('world') and t['state'] == "TASK_RUNNING"]
    assert len(running_tasks) == config.world_task_count(config.SERVICE_NAME)
    for t in running_tasks:
        assert config.close_enough(t['resources']['cpus'], updated_cpus)
def test_kill_agent():
    '''kill the agent task, verify that the agent task is relaunched against the same executor as before'''
    verify_shared_executor('hello-0')

    old_node_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'hello-0-node')
    assert len(old_node_ids) == 1
    old_agent_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'hello-0-agent')
    assert len(old_agent_ids) == 1

    sdk_cmd.kill_task_with_pattern(
        'agent-container-path/output',  # hardcoded in cmd, see yml
        sdk_hosts.system_host(config.SERVICE_NAME, 'hello-0-agent'))

    sdk_tasks.check_tasks_not_updated(config.SERVICE_NAME, 'hello-0-node', old_node_ids)
    sdk_plan.wait_for_completed_recovery(config.SERVICE_NAME)
    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'hello-0-agent', old_agent_ids)

    # the first verify_shared_executor call deleted the files. only the nonessential file came back via its relaunch.
    verify_shared_executor('hello-0')
Esempio n. 26
0
def test_integrity_on_name_node_failure():
    """
    The first name node (name-0-node) is the active name node by default when HDFS gets installed.
    This test checks that it is possible to write and read data after the active name node fails
    so as to verify a failover sustains expected functionality.
    """
    active_name_node = config.get_active_name_node(config.SERVICE_NAME)
    sdk_tasks.kill_task_with_pattern("NameNode", sdk_hosts.system_host(config.SERVICE_NAME, active_name_node))

    predicted_active_name_node = "name-1-node"
    if active_name_node == "name-1-node":
        predicted_active_name_node = "name-0-node"

    wait_for_failover_to_complete(predicted_active_name_node)

    config.write_data_to_hdfs(config.SERVICE_NAME, config.TEST_FILE_2_NAME)
    config.read_data_from_hdfs(config.SERVICE_NAME, config.TEST_FILE_2_NAME)

    config.check_healthy(service_name=config.SERVICE_NAME)
Esempio n. 27
0
def test_kill_nonessential():
    '''kill the nonessential task, verify that the nonessential task is relaunched against the same executor as before'''
    verify_shared_executor('hello-0')

    old_essential_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'hello-0-essential')
    assert len(old_essential_ids) == 1
    old_nonessential_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'hello-0-nonessential')
    assert len(old_nonessential_ids) == 1

    sdk_cmd.kill_task_with_pattern(
        'shared-volume/nonessential', # hardcoded in cmd, see yml
        sdk_hosts.system_host(config.SERVICE_NAME, 'hello-0-nonessential'))

    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'hello-0-nonessential', old_nonessential_ids)
    sdk_plan.wait_for_completed_recovery(config.SERVICE_NAME)
    sdk_tasks.check_tasks_not_updated(config.SERVICE_NAME, 'hello-0-essential', old_essential_ids)

    # the first verify_shared_executor call deleted the files. only the nonessential file came back via its relaunch.
    verify_shared_executor('hello-0', expected_files=['nonessential'])
Esempio n. 28
0
def test_integrity_on_name_node_failure():
    """
    The first name node (name-0-node) is the active name node by default when HDFS gets installed.
    This test checks that it is possible to write and read data after the active name node fails
    so as to verify a failover sustains expected functionality.
    """
    active_name_node = config.get_active_name_node(config.SERVICE_NAME)
    sdk_cmd.kill_task_with_pattern("NameNode", sdk_hosts.system_host(config.SERVICE_NAME, active_name_node))

    predicted_active_name_node = "name-1-node"
    if active_name_node == "name-1-node":
        predicted_active_name_node = "name-0-node"

    wait_for_failover_to_complete(predicted_active_name_node)

    test_filename = get_unique_filename("test_namenode_fail")
    config.write_data_to_hdfs(config.SERVICE_NAME, test_filename)
    config.read_data_from_hdfs(config.SERVICE_NAME, test_filename)

    config.check_healthy(service_name=config.SERVICE_NAME)
Esempio n. 29
0
def test_kill_agent():
    '''kill the agent task, verify that the agent task is relaunched against the same executor as before'''
    verify_shared_executor('hello-0')

    old_node_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'hello-0-node')
    assert len(old_node_ids) == 1
    old_agent_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME,
                                           'hello-0-agent')
    assert len(old_agent_ids) == 1

    sdk_cmd.kill_task_with_pattern(
        'agent-container-path/output',  # hardcoded in cmd, see yml
        sdk_hosts.system_host(config.SERVICE_NAME, 'hello-0-agent'))

    sdk_tasks.check_tasks_not_updated(config.SERVICE_NAME, 'hello-0-node',
                                      old_node_ids)
    sdk_plan.wait_for_completed_recovery(config.SERVICE_NAME)
    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'hello-0-agent',
                                  old_agent_ids)

    # the first verify_shared_executor call deleted the files. only the nonessential file came back via its relaunch.
    verify_shared_executor('hello-0')
def test_partition():
    host = sdk_hosts.system_host(config.SERVICE_NAME, "hello-0-server")
    shakedown.partition_agent(host)
    shakedown.reconnect_agent(host)
    config.check_running()