def test_mom_with_network_failure_bounce_master(): """Marathon on Marathon (MoM) tests for DC/OS with network failures simulated by knocking out ports.""" # get MoM ip mom_ip = common.ip_of_mom() logger.info("MoM IP: {}".format(mom_ip)) app_def = apps.sleep_app() app_id = app_def["id"] with shakedown.marathon_on_marathon(): client = marathon.create_client() client.add_app(app_def) shakedown.wait_for_task("marathon-user", app_id.lstrip('/')) tasks = client.get_tasks(app_id) original_task_id = tasks[0]["id"] task_ip = tasks[0]['host'] logger.info("\nTask IP: " + task_ip) # PR for network partitioning in shakedown makes this better # take out the net partition_agent(mom_ip) partition_agent(task_ip) # wait for a min time.sleep(timedelta(minutes=1).total_seconds()) # bounce master run_command_on_master("sudo systemctl restart dcos-mesos-master") # bring the net up reconnect_agent(mom_ip) reconnect_agent(task_ip) time.sleep(timedelta(minutes=1).total_seconds()) common.wait_for_service_endpoint('marathon-user', timedelta(minutes=10).total_seconds(), path="ping") with shakedown.marathon_on_marathon(): client = marathon.create_client() shakedown.wait_for_task("marathon-user", app_id.lstrip('/'), timedelta(minutes=10).total_seconds()) @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception) def check_task_is_back(): tasks = client.get_tasks(app_id) assert tasks[0][ 'id'] == original_task_id, "The task ID has changed" check_task_is_back()
def start_master_http_service(port=7777, pid_file='python_http.pid'): """ Starts a http service on the master leader. The main purpose is to serve up artifacts for launched test applications. This is commonly used in combination with copying tests or artifacts to the leader than configuring the messos task to fetch from http://master.mesos:7777/artifact.tar (becareful in a multi-master env) :param port: port to use for the http service :return: pid_file """ run_command_on_master( 'nohup /opt/mesosphere/bin/python -m http.server {} > http.log 2>&1 & ' 'echo $! > {}'.format(port, pid_file)) return pid_file
def test_docker_dns_mapping(marathon_service_name): """Tests that a running Docker task is accessible via DNS.""" app_def = apps.docker_http_server(app_id='/docker-dns-mapping-app') app_id = app_def["id"] client = marathon.create_client() client.add_app(app_def) deployment_wait(service_id=app_id) bad_cmd = 'ping -c 1 docker-test.marathon-user.mesos-bad' status, output = run_command_on_master(bad_cmd) assert not status @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception) def check_dns(): dnsname = '{}.{}.mesos'.format(app_id.lstrip('/'), marathon_service_name) cmd = 'ping -c 1 {}'.format(dnsname) wait_for_dns(dnsname) status, output = run_command_on_master(cmd) assert status, "ping failed for app using DNS lookup: {}".format( dnsname) check_dns()
def assert_http_code(url, http_code='200'): cmd = r'curl -s -o /dev/null -w "%{http_code}"' cmd = cmd + ' {}'.format(url) status, output = run_command_on_master(cmd) assert status, "{} failed".format(cmd) assert output == http_code, "Got {} status code".format(output)
def check_dns(): dnsname = '{}.{}.mesos'.format(app_id.lstrip('/'), marathon_service_name) cmd = 'ping -c 1 {}'.format(dnsname) wait_for_dns(dnsname) status, output = run_command_on_master(cmd) assert status, "ping failed for app using DNS lookup: {}".format( dnsname)
def test_mom_with_network_failure_bounce_master(): """Marathon on Marathon (MoM) tests for DC/OS with network failures simulated by knocking out ports.""" # get MoM ip mom_ip = common.ip_of_mom() logger.info("MoM IP: {}".format(mom_ip)) app_def = apps.sleep_app() app_id = app_def["id"] with marathon_on_marathon() as client: client.add_app(app_def) wait_for_task("marathon-user", app_id.lstrip('/')) tasks = client.get_tasks(app_id) original_task_id = tasks[0]["id"] task_ip = tasks[0]['host'] logger.info("\nTask IP: " + task_ip) # PR for network partitioning in shakedown makes this better # take out the net partition_agent(mom_ip) partition_agent(task_ip) # wait for a min time.sleep(timedelta(minutes=1).total_seconds()) # bounce master run_command_on_master("sudo systemctl restart dcos-mesos-master") # bring the net up reconnect_agent(mom_ip) reconnect_agent(task_ip) time.sleep(timedelta(minutes=1).total_seconds()) wait_for_service_endpoint('marathon-user', timedelta(minutes=10).total_seconds(), path="ping") with marathon_on_marathon() as client: wait_for_task("marathon-user", app_id.lstrip('/'), timedelta(minutes=10).total_seconds()) @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception) def check_task_is_back(): tasks = client.get_tasks(app_id) assert tasks[0]['id'] == original_task_id, "The task ID has changed" check_task_is_back()
def test_marathon_backup_and_restore_leader(marathon_service_name): """Backup and restore meeting is done with only one master since new master has to be able to read the backup file that was created by the previous master and the easiest way to test it is when there is 1 master """ backup_file = 'backup.tar' backup_dir = '/tmp' backup_url = 'file://{}/{}'.format(backup_dir, backup_file) # Deploy a simple test app. It is expected to be there after leader reelection app_def = apps.sleep_app() app_id = app_def['id'] client = marathon.create_client() client.add_app(app_def) common.deployment_wait(service_id=app_id) app = client.get_app(app_id) assert app[ 'tasksRunning'] == 1, "The number of running tasks is {}, but 1 was expected".format( app["tasksRunning"]) task_id = app['tasks'][0]['id'] # Abdicate the leader with backup and restore original_leader = marathon_leader_ip() print('leader: {}'.format(original_leader)) params = '?backup={}&restore={}'.format(backup_url, backup_url) print('DELETE /v2/leader{}'.format(params)) common.abdicate_marathon_leader(params) # Wait for new leader (but same master server) to be up and ready common.wait_for_service_endpoint(marathon_service_name, timedelta(minutes=5).total_seconds(), path="ping") app = client.get_app(app_id) assert app[ 'tasksRunning'] == 1, "The number of running tasks is {}, but 1 was expected".format( app["tasksRunning"]) assert task_id == app['tasks'][0][ 'id'], "Task has a different ID after restore" # Check if the backup file exits and is valid cmd = 'tar -tf {}/{} | wc -l'.format(backup_dir, backup_file) status, data = run_command_on_master(cmd) assert status, 'Failed to validate backup file {}'.format(backup_url) assert int(data.rstrip()) > 0, "Backup file is empty"
def test_marathon_backup_and_restore_leader(marathon_service_name): """Backup and restore meeting is done with only one master since new master has to be able to read the backup file that was created by the previous master and the easiest way to test it is when there is 1 master """ backup_file = 'backup.tar' backup_dir = '/tmp' backup_url = 'file://{}/{}'.format(backup_dir, backup_file) # Deploy a simple test app. It is expected to be there after leader reelection app_def = apps.sleep_app() app_id = app_def['id'] client = marathon.create_client() client.add_app(app_def) deployment_wait(service_id=app_id) app = client.get_app(app_id) assert app['tasksRunning'] == 1, "The number of running tasks is {}, but 1 was expected".format(app["tasksRunning"]) task_id = app['tasks'][0]['id'] # Abdicate the leader with backup and restore original_leader = marathon_leader_ip() print('leader: {}'.format(original_leader)) params = '?backup={}&restore={}'.format(backup_url, backup_url) print('DELETE /v2/leader{}'.format(params)) common.abdicate_marathon_leader(params) # Wait for new leader (but same master server) to be up and ready wait_for_service_endpoint(marathon_service_name, timedelta(minutes=5).total_seconds(), path="ping") app = client.get_app(app_id) assert app['tasksRunning'] == 1, "The number of running tasks is {}, but 1 was expected".format(app["tasksRunning"]) assert task_id == app['tasks'][0]['id'], "Task has a different ID after restore" # Check if the backup file exits and is valid cmd = 'tar -tf {}/{} | wc -l'.format(backup_dir, backup_file) status, data = run_command_on_master(cmd) assert status, 'Failed to validate backup file {}'.format(backup_url) assert int(data.rstrip()) > 0, "Backup file is empty"
def test_docker_dns_mapping(marathon_service_name): """Tests that a running Docker task is accessible via DNS.""" app_def = apps.docker_http_server(app_id='/docker-dns-mapping-app') app_id = app_def["id"] client = marathon.create_client() client.add_app(app_def) deployment_wait(service_id=app_id) bad_cmd = 'ping -c 1 docker-test.marathon-user.mesos-bad' status, output = run_command_on_master(bad_cmd) assert not status @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception) def check_dns(): dnsname = '{}.{}.mesos'.format(app_id.lstrip('/'), marathon_service_name) cmd = 'ping -c 1 {}'.format(dnsname) wait_for_dns(dnsname) status, output = run_command_on_master(cmd) assert status, "ping failed for app using DNS lookup: {}".format(dnsname) check_dns()
def check_data(port, path, expected): cmd = "curl {}:{}/{}/foo".format(host, port, path) run, data = run_command_on_master(cmd) assert run, "{} did not succeed".format(cmd) assert expected in data, "{} not found in '{}'n".format(expected, data)
def value_check(): status, data = run_command_on_master(cmd) assert status, "{} did not succeed".format(cmd) assert data.rstrip() == secret_value
def http_output_check(): status, output = run_command_on_master('curl {}'.format(relay_url)) assert status, "curl {} failed on master with {}".format(relay_url, output) assert 'Pong {}'.format(pinger_app["id"]) in output assert 'Relay from {}'.format(relay_app["id"]) in output
def restart_master_node(): """Restarts the master node.""" run_command_on_master("sudo /sbin/shutdown -r now")
def systemctl_master(command='restart'): run_command_on_master('sudo systemctl {} dcos-mesos-master'.format(command))
def value_check(): status, data = run_command_on_master(cmd) assert status, "{} did not succeed. status = {}, data = {}".format( cmd, status, data) assert data.rstrip() == secret_value, "Got an unexpected secret data"
def check_dns(): dnsname = '{}.{}.mesos'.format(app_id.lstrip('/'), marathon_service_name) cmd = 'ping -c 1 {}'.format(dnsname) wait_for_dns(dnsname) status, output = run_command_on_master(cmd) assert status, "ping failed for app using DNS lookup: {}".format(dnsname)
def systemctl_master(command='restart'): """ Used to start, stop or restart the master process """ run_command_on_master( 'sudo systemctl {} dcos-mesos-master'.format(command))
def systemctl_master(command='restart'): run_command_on_master( 'sudo systemctl {} dcos-mesos-master'.format(command))
def check_task(cmd, target_data): run, data = run_command_on_master(cmd) assert run, "{} did not succeed".format(cmd) assert target_data in data, "'{}' not found in {}".format(target_data, data)
def check_http_endpoint(port, path): cmd = "curl {}:{}/{}/foo".format(host, port, path) run, data = run_command_on_master(cmd) assert run, "{} did not succeed".format(cmd) assert data == 'hello\n', "'{}' was not equal to hello\\n".format(data)
def http_output_check(): status, output = run_command_on_master('curl {}'.format(relay_url)) assert status, "curl {} failed on master with {}".format( relay_url, output) assert 'Pong {}'.format(pinger_app["id"]) in output assert 'Relay from {}'.format(relay_app["id"]) in output
def value_check(): status, data = run_command_on_master(cmd) assert status, "{} did not succeed. status = {}, data = {}".format(cmd, status, data) assert data.rstrip() == secret_value, "Got an unexpected secret data"
def check_task(cmd, target_data): run, data = run_command_on_master(cmd) assert run, "{} did not succeed".format(cmd) assert data == target_data, "'{}' was not equal to {}".format(data, target_data)
def check_data(port, path): cmd = "curl {}:{}/{}/foo".format(host, port, path) run, data = run_command_on_master(cmd) assert run, "{} did not succeed".format(cmd) assert 'hello\nhello\n' in data, "'hello\nhello\n' not found in '{}'n".format( data)
def check_task(cmd, target_data): run, data = run_command_on_master(cmd) assert run, "{} did not succeed".format(cmd) assert data == target_data, "'{}' was not equal to {}".format( data, target_data)
def check_task(cmd, target_data): run, data = run_command_on_master(cmd) assert run, "{} did not succeed".format(cmd) assert target_data in data, "'{}' not found in {}".format( target_data, data)
def check_http_endpoint(port, path, expected): cmd = "curl {}:{}/{}/foo".format(host, port, path) run, data = run_command_on_master(cmd) assert run, "{} did not succeed".format(cmd) assert expected in data, "'{}' was not found in '{}'".format(data, expected)