Ejemplo n.º 1
0
def create_marathon_dns_app(app_id: str, healthcheck_app_id: str) -> dict:
    # DNS resolution app to make sure DNS is available during the upgrade.
    # Periodically resolves the healthcheck app's domain name and logs whether
    # it succeeded to a file in the Mesos sandbox.
    return {
        "id": '/' + app_id,
        "cmd": """
while true
do
    printf "%s " $(date --utc -Iseconds) >> $MESOS_SANDBOX/$DNS_LOG_FILENAME
    if host -W $TIMEOUT_SECONDS $RESOLVE_NAME
    then
        echo SUCCESS >> $MESOS_SANDBOX/$DNS_LOG_FILENAME
    else
        echo FAILURE >> $MESOS_SANDBOX/$DNS_LOG_FILENAME
    fi
    sleep $INTERVAL_SECONDS
done
""",
        "env": {
            'RESOLVE_NAME':
            marathon_app_id_to_mesos_dns_subdomain(healthcheck_app_id) +
            '.marathon.mesos',
            'DNS_LOG_FILENAME':
            'dns_resolve_log.txt',
            'INTERVAL_SECONDS':
            '1',
            'TIMEOUT_SECONDS':
            '1',
        },
        "cpus": 0.5,
        "mem": 32.0,
        "instances": 1,
        "container": {
            "type": "DOCKER",
            "docker": {
                "image": "branden/bind-utils",
                "network": "BRIDGE",
            }
        },
        "dependencies": [healthcheck_app_id],
    }
Ejemplo n.º 2
0
def create_marathon_dns_app(app_id: str, healthcheck_app_id: str) -> dict:
    # DNS resolution app to make sure DNS is available during the upgrade.
    # Periodically resolves the healthcheck app's domain name and logs whether
    # it succeeded to a file in the Mesos sandbox.
    return {
        "id": '/' + app_id,
        "cmd": """
while true
do
    printf "%s " $(date --utc -Iseconds) >> $MESOS_SANDBOX/$DNS_LOG_FILENAME
    if host -W $TIMEOUT_SECONDS $RESOLVE_NAME
    then
        echo SUCCESS >> $MESOS_SANDBOX/$DNS_LOG_FILENAME
    else
        echo FAILURE >> $MESOS_SANDBOX/$DNS_LOG_FILENAME
    fi
    sleep $INTERVAL_SECONDS
done
""",
        "env": {
            'RESOLVE_NAME': marathon_app_id_to_mesos_dns_subdomain(healthcheck_app_id) + '.marathon.mesos',
            'DNS_LOG_FILENAME': 'dns_resolve_log.txt',
            'INTERVAL_SECONDS': '1',
            'TIMEOUT_SECONDS': '1',
        },
        "cpus": 0.5,
        "mem": 32.0,
        "instances": 1,
        "container": {
            "type": "DOCKER",
            "docker": {
                "image": "branden/bind-utils",
                "network": "BRIDGE",
            }
        },
        "dependencies": [healthcheck_app_id],
    }
Ejemplo n.º 3
0
def cluster_workload(cluster_api):
    """Start a cluster workload on entry and verify its health on exit."""
    def app_task_ids(app_id):
        """Return a list of Mesos task IDs for app_id's running tasks."""
        assert app_id.startswith('/')
        response = cluster_api.marathon.get('/v2/apps' + app_id + '/tasks')
        response.raise_for_status()
        tasks = response.json()['tasks']
        return [task['id'] for task in tasks]

    def parse_dns_log(dns_log_content):
        """Return a list of (timestamp, status) tuples from dns_log_content."""
        dns_log = [
            line.strip().split(' ')
            for line in dns_log_content.strip().split('\n')
        ]
        if any(
                len(entry) != 2 or entry[1] not in ['SUCCESS', 'FAILURE']
                for entry in dns_log):
            message = 'Malformed DNS log.'
            log.debug(message + ' DNS log content:\n' + dns_log_content)
            raise Exception(message)
        return dns_log

    @retrying.retry(wait_fixed=(1 * 1000),
                    stop_max_delay=(120 * 1000),
                    retry_on_result=lambda x: not x)
    def wait_for_dns(hostname):
        """Return True if Mesos-DNS has at least one entry for hostname."""
        hosts = cluster_api.get('/mesos_dns/v1/hosts/' + hostname).json()
        return any(h['host'] != '' and h['ip'] != '' for h in hosts)

    test_id = uuid.uuid4().hex

    # HTTP healthcheck app to make sure tasks are reachable during the upgrade.
    # If a task fails its healthcheck, Marathon will terminate it and we'll
    # notice it was killed when we check tasks on exit.
    healthcheck_app = {
        "id":
        '/' + TEST_APP_NAME_FMT.format('healthcheck-' + test_id),
        "cmd":
        "python3 -m http.server 8080",
        "cpus":
        0.5,
        "mem":
        32.0,
        "instances":
        1,
        "container": {
            "type": "DOCKER",
            "docker": {
                "image": "python:3",
                "network": "BRIDGE",
                "portMappings": [{
                    "containerPort": 8080,
                    "hostPort": 0
                }]
            }
        },
        "healthChecks": [{
            "protocol": "HTTP",
            "path": "/",
            "portIndex": 0,
            "gracePeriodSeconds": 5,
            "intervalSeconds": 1,
            "timeoutSeconds": 5,
            "maxConsecutiveFailures": 1
        }],
    }

    # DNS resolution app to make sure DNS is available during the upgrade.
    # Periodically resolves the healthcheck app's domain name and logs whether
    # it succeeded to a file in the Mesos sandbox.
    dns_app = {
        "id": '/' + TEST_APP_NAME_FMT.format('dns-' + test_id),
        "cmd": """
while true
do
    printf "%s " $(date --utc -Iseconds) >> $MESOS_SANDBOX/$DNS_LOG_FILENAME
    if host -W $TIMEOUT_SECONDS $RESOLVE_NAME
    then
        echo SUCCESS >> $MESOS_SANDBOX/$DNS_LOG_FILENAME
    else
        echo FAILURE >> $MESOS_SANDBOX/$DNS_LOG_FILENAME
    fi
    sleep $INTERVAL_SECONDS
done
""",
        "env": {
            'RESOLVE_NAME':
            marathon_app_id_to_mesos_dns_subdomain(healthcheck_app['id']) +
            '.marathon.mesos',
            'DNS_LOG_FILENAME':
            'dns_resolve_log.txt',
            'INTERVAL_SECONDS':
            '1',
            'TIMEOUT_SECONDS':
            '1',
        },
        "cpus": 0.5,
        "mem": 32.0,
        "instances": 1,
        "container": {
            "type": "DOCKER",
            "docker": {
                "image": "branden/bind-utils",
                "network": "BRIDGE",
            }
        },
        "dependencies": [healthcheck_app['id']],
    }

    # Deploy test apps.
    # TODO(branden): We ought to be able to deploy these apps concurrently. See
    # https://mesosphere.atlassian.net/browse/DCOS-13360.
    cluster_api.marathon.deploy_app(healthcheck_app)
    cluster_api.marathon.ensure_deployments_complete()
    # This is a hack to make sure we don't deploy dns_app before the name it's
    # trying to resolve is available.
    wait_for_dns(dns_app['env']['RESOLVE_NAME'])
    cluster_api.marathon.deploy_app(dns_app, check_health=False)
    cluster_api.marathon.ensure_deployments_complete()

    test_apps = [healthcheck_app, dns_app]
    test_app_ids = [app['id'] for app in test_apps]

    tasks_start = {
        app_id: sorted(app_task_ids(app_id))
        for app_id in test_app_ids
    }
    log.debug('Test app tasks at start:\n' + pprint.pformat(tasks_start))

    for app in test_apps:
        assert app['instances'] == len(tasks_start[app['id']])

    yield

    if not cluster_api.marathon.get('/v2/apps').json()['apps']:
        raise Exception('No Marathon apps running!')

    tasks_end = {
        app_id: sorted(app_task_ids(app_id))
        for app_id in test_app_ids
    }
    log.debug('Test app tasks at end:\n' + pprint.pformat(tasks_end))

    # Verify that the tasks we started are still running.
    assert tasks_start == tasks_end

    # Verify DNS didn't fail.
    marathon_framework_id = cluster_api.marathon.get(
        '/v2/info').json()['frameworkId']
    dns_app_task = cluster_api.marathon.get('/v2/apps' + dns_app['id'] +
                                            '/tasks').json()['tasks'][0]
    dns_log = parse_dns_log(
        cluster_api.mesos_sandbox_file(
            dns_app_task['slaveId'],
            marathon_framework_id,
            dns_app_task['id'],
            dns_app['env']['DNS_LOG_FILENAME'],
        ))
    dns_failure_times = [
        entry[0] for entry in dns_log if entry[1] != 'SUCCESS'
    ]
    if len(dns_failure_times) > 0:
        message = 'Failed to resolve Marathon app hostname {} at least once.'.format(
            dns_app['env']['RESOLVE_NAME'])
        log.debug(message + ' Hostname failed to resolve at these times:\n' +
                  '\n'.join(dns_failure_times))
Ejemplo n.º 4
0
def test_marathon_app_id_to_mesos_dns_subdomain():
    assert helpers.marathon_app_id_to_mesos_dns_subdomain('/app-1') == 'app-1'
    assert helpers.marathon_app_id_to_mesos_dns_subdomain('app-1') == 'app-1'
    assert helpers.marathon_app_id_to_mesos_dns_subdomain(
        '/group-1/app-1') == 'app-1-group-1'
Ejemplo n.º 5
0
def test_marathon_app_id_to_mesos_dns_subdomain():
    assert helpers.marathon_app_id_to_mesos_dns_subdomain('/app-1') == 'app-1'
    assert helpers.marathon_app_id_to_mesos_dns_subdomain('app-1') == 'app-1'
    assert helpers.marathon_app_id_to_mesos_dns_subdomain('/group-1/app-1') == 'app-1-group-1'