def test_service_becomes_healthy_after_agent_is_partitioned():
    host = get_broker_host()

    spin(shakedown.partition_agent, lambda x: (True, ''), host)
    shakedown.reconnect_agent(host)

    check_health()
def test_service_becomes_healthy_after_agent_is_partitioned():
    host = get_broker_host()

    spin(shakedown.partition_agent, lambda x: (True, ''), host)
    shakedown.reconnect_agent(host)

    check_health()
Example #3
0
def test_lock():
    '''This test verifies that a second scheduler fails to startup when
    an existing scheduler is running.  Without locking, the scheduler
    would fail during registration, but after writing its config to ZK.
    So in order to verify that the scheduler fails immediately, we ensure
    that the ZK config state is unmodified.'''

    marathon_client = dcos.marathon.create_client()

    # Get ZK state from running framework
    zk_path = "dcos-service-{}/ConfigTarget".format(PACKAGE_NAME)
    zk_config_old = shakedown.get_zk_node_data(zk_path)

    # Get marathon app
    app_id = "/{}".format(PACKAGE_NAME)
    app = marathon_client.get_app(app_id)
    old_timestamp = app.get("lastTaskFailure", {}).get("timestamp", None)

    # Scale to 2 instances
    labels = app["labels"]
    labels.pop("MARATHON_SINGLE_INSTANCE_APP")
    marathon_client.update_app(app_id, {"labels": labels})
    shakedown.deployment_wait()
    marathon_client.update_app(app_id, {"instances": 2})

    # Wait for second scheduler to fail
    fn = lambda: marathon_client.get_app(app_id).get("lastTaskFailure", {}
                                                     ).get("timestamp", None)
    success = lambda timestamp: (timestamp != old_timestamp,
                                 "second scheduler has not yet failed")
    spin(fn, success)

    # Verify ZK is unchanged
    zk_config_new = shakedown.get_zk_node_data(zk_path)
    assert zk_config_old == zk_config_new
Example #4
0
def write_messages():
    # kafka may not be ready to accept all msgs, try till all are done
    def fn(num):
        try:
            offset_info = get_kafka_command(
                'topic offsets {}'.format(TOPIC_NAME))
            offset = int(offset_info[0]['0'])
            if offset < num:
                get_kafka_command('topic producer_test {} {}'.format(
                    TOPIC_NAME, num - offset))
            assert (num - offset) >= 0
            return num - offset
        except RuntimeError:
            return num

    def success_predicate(left_offset):
        return (left_offset <= 0, 'producer_test continues....')

    get_kafka_command('topic producer_test {} {}'.format(
        TOPIC_NAME, NUM_TEST_MSGS))
    spin(fn, success_predicate, NUM_TEST_MSGS)
    print('producer_test is successful {} msg available'.format(NUM_TEST_MSGS))

    # double check
    check_offsets()
Example #5
0
def new_default_version_available(prev_version):
    def fn():
        get_pkg_version()

    def success_predicate(pkg_version):
        return (pkg_version != prev_version, 'Package version has not changed')

    spin(fn, success_predicate)
def test_service_becomes_healthy_after_all_agents_are_partitioned():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)

    for host in hosts:
        spin(shakedown.partition_agent, lambda x: (True, ''), host)
    for host in hosts:
        shakedown.reconnect_agent(host)

    check_health()
def test_service_becomes_healthy_after_all_agents_are_partitioned():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)

    for host in hosts:
        spin(shakedown.partition_agent, lambda x: (True, ''), host)
    for host in hosts:
        shakedown.reconnect_agent(host)

    check_health()
def topics_are_available():
    def fn():
        try:
            get_kafka_command('topic list')
            return True
        except RuntimeError:
            return False

    def success_predicate(topics_available):
        return (topics_available, 'Topics are not available')

    spin(fn, success_predicate)
Example #9
0
def destroy_service():
    destroy_endpoint = marathon_api_url_with_param('apps', PACKAGE_NAME)
    request(dcos.http.delete, destroy_endpoint)

    # Make sure the scheduler has been destroyed
    def fn():
        shakedown.get_service(PACKAGE_NAME)

    def success_predicate(service):
        return (service == None, 'Service not destroyed')

    spin(fn, success_predicate)
def check_scheduler_health():
    # Make sure scheduler endpoint is responding and all brokers are available
    def fn():
        try:
            return get_kafka_command('broker list')
        except RuntimeError:
            return []

    def success_predicate(brokers):
        return (len(brokers) == DEFAULT_BROKER_COUNT,
                'Scheduler and all brokers not available')

    spin(fn, success_predicate)
Example #11
0
def test_is_suppressed():
    dcos_url = dcos.config.get_config_val('core.dcos_url')
    suppressed_url = urllib.parse.urljoin(
        dcos_url, 'service/kafka/v1/state/properties/suppressed')

    def suppress_url_check():
        response = dcos.http.get(suppressed_url)
        response.raise_for_status()
        return response.text

    def success_predicate(result):
        return (result == "true", 'Waiting for supressed')

    spin(suppress_url_check, success_predicate)
def check_offsets():
    topics_are_available()

    # Keep trying to read the offsets until the kafka command succeeds
    def fn():
        try:
            offset_info = get_kafka_command(
                'topic offsets {}'.format(TOPIC_NAME))
            offset = int(offset_info[0]['0'])
            assert offset == NUM_TEST_MSGS
            return True
        except RuntimeError:
            return False

    def success_predicate(got_offset):
        return (got_offset, 'Unable to get offset')

    spin(fn, success_predicate)
Example #13
0
def task_id_changes(broker_name, task_id):
    def fn():
        try:
            tasks = shakedown.get_service_tasks(PACKAGE_NAME)
            return [t for t in tasks if t['state'] == TASK_RUNNING_STATE and t['name'] == broker_name]
        except dcos.errors.DCOSHTTPException:
            return []

    def success_predicate(tasks):
        return (len(tasks) == 1 and tasks[0]['id'] != task_id, "Task ID didn't change.")

    return spin(fn, success_predicate)
def get_running_broker_task(broker_name):
    def fn():
        try:
            tasks = shakedown.get_service_tasks(PACKAGE_NAME)
            return [t for t in tasks if t['state'] == TASK_RUNNING_STATE and t['name'] == broker_name]
        except dcos.errors.DCOSHTTPException:
            return []

    def success_predicate(tasks):
        return (len(tasks) == 1, 'Failed to get task')

    return spin(fn, success_predicate)
def task_id_changes(broker_name, task_id):
    def fn():
        try:
            tasks = shakedown.get_service_tasks(PACKAGE_NAME)
            return [t for t in tasks if t['state'] == TASK_RUNNING_STATE and t['name'] == broker_name]
        except dcos.errors.DCOSHTTPException:
            return []

    def success_predicate(tasks):
        return (len(tasks) == 1 and tasks[0]['id'] != task_id, "Task ID didn't change.")

    return spin(fn, success_predicate)
Example #16
0
def get_running_broker_task(broker_name):
    def fn():
        try:
            tasks = shakedown.get_service_tasks(PACKAGE_NAME)
            return [t for t in tasks if t['state'] == TASK_RUNNING_STATE and t['name'] == broker_name]
        except dcos.errors.DCOSHTTPException:
            return []

    def success_predicate(tasks):
        return (len(tasks) == 1, 'Failed to get task')

    return spin(fn, success_predicate)
def wait_for_deployment_lock_release():
    def fn():
        return dcos.http.get(marathon_api_url('deployments'))

    def pred(result):
        try:
            return (result.status_code == 200
                    and result.json() == [], 'Deployment was not unlocked')
        except json.decoder.JSONDecodeError as e:
            return False, 'Deployment was not unlocked'

    return spin(fn, pred)
Example #18
0
def test_failing_health_check(static_port_config):
    broker_id = '0'
    broker_name = 'broker-' + broker_id

    def found_broker(result):
        return result != None, 'Broker not found.'

    def broker_killed_result_checker(result):
        return result, 'Broker not killed.'

    print('Waiting for last Running Broker.')
    test_utils.spin(get_running_broker_task_id, found_broker, 'broker-2')

    # Get broker-0's task ID so we can know when it kills itself after failing
    # the health check.
    task_id = get_running_broker_task_id(broker_name)
    print("{}'s task_id is {}".format(broker_name, task_id))

    # Delete the ZK node which should trigger the health check to kill broker-0
    shakedown.run_command_on_master(
        'wget https://github.com/outbrain/zookeepercli/releases/'
        'download/v1.0.10/zookeepercli')
    shakedown.run_command_on_master('sudo chmod +x zookeepercli')
    shakedown.run_command_on_master(
        './zookeepercli --servers 127.0.0.1 -c delete '
        '/dcos-service-kafka/brokers/ids/' + broker_id)

    print('Waiting for Broker to fail.')
    test_utils.spin(broker_killed, broker_killed_result_checker, task_id)

    print('Waiting for Running Broker.')
    test_utils.spin(get_running_broker_task_id, found_broker, broker_name)
def test_failing_health_check(static_port_config):
    broker_id = '0'
    broker_name = 'broker-' + broker_id

    def found_broker(result):
        return result != None, 'Broker not found.'

    def broker_killed_result_checker(result):
        return result, 'Broker not killed.'

    print('Waiting for last Running Broker.')
    test_utils.spin(get_running_broker_task_id, found_broker, 'broker-2')

    # Get broker-0's task ID so we can know when it kills itself after failing
    # the health check.
    task_id = get_running_broker_task_id(broker_name)
    print("{}'s task_id is {}".format(broker_name, task_id))

    # Delete the ZK node which should trigger the health check to kill broker-0
    shakedown.run_command_on_master(
        'wget https://github.com/outbrain/zookeepercli/releases/'
        'download/v1.0.10/zookeepercli'
    )
    shakedown.run_command_on_master('sudo chmod +x zookeepercli')
    shakedown.run_command_on_master(
        './zookeepercli --servers 127.0.0.1 -c delete '
        '/dcos-service-kafka/brokers/ids/' + broker_id
    )

    print('Waiting for Broker to fail.')
    test_utils.spin(broker_killed, broker_killed_result_checker, task_id)

    print('Waiting for Running Broker.')
    test_utils.spin(get_running_broker_task_id, found_broker, broker_name)
def wait_for_deployment_lock_release():
    def fn():
        return dcos.http.get(marathon_api_url('deployments'))

    def pred(result):
        try:
            return (
                result.status_code == 200 and result.json() == [],
                'Deployment was not unlocked'
            )
        except json.decoder.JSONDecodeError as e:
            return False, 'Deployment was not unlocked'

    return spin(fn, pred)
def test_scheduler_connection_setup_is_correct():
    def fn():
        return get_kafka_command('connection')

    def pred(result):
        return (len(result['address']) == DEFAULT_BROKER_COUNT,
                'Expected number of brokers never came online')

    connection_info = spin(fn, pred)

    assert len(connection_info) == 4
    assert len(connection_info['dns']) == DEFAULT_BROKER_COUNT
    assert connection_info['zookeeper'] == (
        'master.mesos:2181/dcos-service-{}'.format(PACKAGE_NAME))
def get_and_verify_plan(predicate=lambda r: True):
    def fn():
        return dcos.http.get(kafka_api_url('plan'))

    def success_predicate(result):
        message = 'Request to /plan failed'

        try:
            body = result.json()
        except:
            return False, message

        return predicate(body), message

    return spin(fn, success_predicate).json()
def get_and_verify_plan(predicate=lambda r: True):
    def fn():
        return dcos.http.get(kafka_api_url('plan'))

    def success_predicate(result):
        message = 'Request to /plan failed'

        try:
            body = result.json()
        except:
            return False, message

        return predicate(body), message

    return spin(fn, success_predicate).json()
def destroy_service():
    destroy_endpoint = marathon_api_url_with_param('apps', PACKAGE_NAME)

    # Keep trying until marathon request succeeds
    def fn():
        try:
            request(dcos.http.delete, destroy_endpoint)
            return True
        except dcos.errors.DCOSHTTPException:
            return False

    def success_predicate(success):
        return (success, 'Destroy request failed')

    spin(fn, success_predicate)

    # Make sure the scheduler has been destroyed
    def fn():
        shakedown.get_service(PACKAGE_NAME)

    def success_predicate(service):
        return (service == None, 'Service not destroyed')

    spin(fn, success_predicate)
def test_scheduler_connection_setup_is_correct():
    def fn():
        return get_kafka_command('connection')

    def pred(result):
        return (
            len(result['address']) == DEFAULT_BROKER_COUNT,
            'Expected number of brokers never came online'
        )

    connection_info = spin(fn, pred)

    assert len(connection_info) == 4
    assert len(connection_info['dns']) == DEFAULT_BROKER_COUNT
    assert connection_info['zookeeper'] == (
        'master.mesos:2181/dcos-service-{}'.format(PACKAGE_NAME)
    )
def get_connection_info():
    def fn():
        return shakedown.run_dcos_command('kafka connection')

    def success_predicate(result):
        deployments = dcos.http.get(marathon_api_url('deployments')).json()
        if deployments:
            return False, 'Deployment is ongoing'

        result, error = result
        try:
            result = json.loads(result)
        except Exception:
            return False, 'Command did not return JSON'
        else:
            return (
                not error and len(result['address']) == 3,
                'Command errored or expected number of brokers are not up',
            )

    return json.loads(spin(fn, success_predicate)[0])
Example #27
0
def get_connection_info():
    def fn():
        return shakedown.run_dcos_command('kafka connection')

    def success_predicate(result):
        deployments = dcos.http.get(marathon_api_url('deployments')).json()
        if deployments:
            return False, 'Deployment is ongoing'

        result, error = result
        try:
            result = json.loads(result)
        except Exception:
            return False, 'Command did not return JSON'
        else:
            return (
                not error and len(result['address']) == 3,
                'Command errored or expected number of brokers are not up',
            )

    return json.loads(spin(fn, success_predicate)[0])
Example #28
0
def tasks_updated(prefix, old_task_ids):
    def fn():
        try:
            return get_task_ids(prefix)
        except dcos.errors.DCOSHTTPException:
            return []

    def success_predicate(task_ids):
        print('Old task ids: ' + str(old_task_ids))
        print('New task ids: ' + str(task_ids))
        success = True

        for id in task_ids:
            print('Checking ' + id)
            if id in old_task_ids:
                success = False

        if not len(task_ids) >= len(old_task_ids):
            success = False

        print('Waiting for update to ' + prefix)
        return (success, 'Task type:' + prefix + ' not updated')

    return spin(fn, success_predicate)
 def partition():
     for host in hosts:
         spin(shakedown.partition_agent, lambda x: (True, ''), host)
     for host in hosts:
         shakedown.reconnect_agent(host)
 def partition():
     for host in hosts:
         spin(shakedown.partition_agent, lambda x: (True, ''), host)
     for host in hosts:
         shakedown.reconnect_agent(host)