def setup_constraint_switch(): sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME) agents = shakedown.get_private_agents() some_agent = agents[0] other_agent = agents[1] log.info('Agents: %s %s', some_agent, other_agent) assert some_agent != other_agent options = _escape_placement_for_1_9({ "service": { "yaml": "marathon_constraint" }, "hello": { "count": 1, # First, we stick the pod to some_agent "placement": "[[\"hostname\", \"LIKE\", \"{}\"]]".format(some_agent) }, "world": { "count": 0 } }) sdk_install.install(config.PACKAGE_NAME, config.SERVICE_NAME, 1, additional_options=options) sdk_tasks.check_running(config.SERVICE_NAME, 1) hello_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'hello') # Now, stick it to other_agent marathon_config = sdk_marathon.get_config(config.SERVICE_NAME) marathon_config['env']['HELLO_PLACEMENT'] = "[[\"hostname\", \"LIKE\", \"{}\"]]".format(other_agent) sdk_marathon.update_app(config.SERVICE_NAME, marathon_config) # Wait for the scheduler to be up and settled before advancing. sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME) return some_agent, other_agent, hello_ids
def __distribute_docker_credentials_file(file_name='docker.tar.gz'): """ Create and copy docker credentials file to passed `{agents}`. Used to access private docker repositories in tests. """ # Upload docker.tar.gz to all private agents for host in shakedown.get_private_agents(): shakedown.copy_file(host, file_name)
def setup_module(module): common.wait_for_metronome() common.wait_for_cosmos() agents = shakedown.get_private_agents() if len(agents) < 2: assert False, "Incorrect Agent count" remove_jobs()
def setup_constraint_switch(): install.uninstall(PACKAGE_NAME) agents = shakedown.get_private_agents() some_agent = agents[0] other_agent = agents[1] print("agents", some_agent, other_agent) assert some_agent != other_agent options = { "service": { "spec_file": "examples/marathon_constraint.yml" }, "hello": { "count": 1, # First, we stick the pod to some_agent "placement": 'hostname:LIKE:{}'.format(some_agent) }, "world": { "count": 0 } } install.install(PACKAGE_NAME, 1, additional_options=options) tasks.check_running(PACKAGE_NAME, 1) hello_ids = tasks.get_task_ids(PACKAGE_NAME, 'hello') # Now, stick it to other_agent config = marathon.get_config(PACKAGE_NAME) config['env']['HELLO_PLACEMENT'] = 'hostname:LIKE:{}'.format(other_agent) marathon.update_app(PACKAGE_NAME, config) # Wait for the scheduler to be up and settled before advancing. plan.wait_for_completed_deployment(PACKAGE_NAME) return some_agent, other_agent, hello_ids
def prefetch_docker_image_on_private_agents( image, timeout=timedelta(minutes=5).total_seconds()): """ Given a docker image. An app with the image is scale across the private agents to ensure that the image is prefetched to all nodes. :param image: docker image name :type image: str :param timeout: timeout for deployment wait in secs (default: 5m) :type password: int """ agents = len(shakedown.get_private_agents()) app = { "id": "/prefetch", "instances": agents, "container": { "type": "DOCKER", "docker": { "image": image } }, "cpus": 0.1, "mem": 128 } client = marathon.create_client() client.add_app(app) shakedown.deployment_wait(timeout) shakedown.delete_all_apps() shakedown.deployment_wait(timeout)
def cluster_info(): print("DC/OS: {}, in {} mode".format(shakedown.dcos_version(), shakedown.ee_version())) agents = shakedown.get_private_agents() print("Agents: {}".format(len(agents))) about = metronome_version() print("Marathon version: {}".format(about))
def setup_module(module): common.wait_for_metronome() common.wait_for_cosmos() common.cluster_info() agents = shakedown.get_private_agents() if len(agents) < 2: assert False, f"Incorrect Agent count. Expecting at least 2 agents, but have {len(agents)}" remove_jobs()
def ip_other_than_mom(): mom_ip = ip_of_mom() agents = shakedown.get_private_agents() for agent in agents: if agent != mom_ip: return agent return None
def ensure_docker_credentials(): # Docker username and password should be passed as environment variables `DOCKER_HUB_USERNAME` # and `DOCKER_HUB_PASSWORD` (usually by jenkins) assert 'DOCKER_HUB_USERNAME' in os.environ, "Couldn't find docker hub username. $DOCKER_HUB_USERNAME is not set" assert 'DOCKER_HUB_PASSWORD' in os.environ, "Couldn't find docker hub password. $DOCKER_HUB_PASSWORD is not set" username = os.environ['DOCKER_HUB_USERNAME'] password = os.environ['DOCKER_HUB_PASSWORD'] common.create_docker_credentials_file(username, password) common.copy_docker_credentials_file(shakedown.get_private_agents())
def test_external_volume(): volume_name = "marathon-si-test-vol-{}".format(uuid.uuid4().hex) app_def = apps.external_volume_mesos_app() app_def["container"]["volumes"][0]["external"]["name"] = volume_name app_id = app_def['id'] # Tested with root marathon since MoM doesn't have # --enable_features external_volumes option activated. # First deployment should create the volume since it has a unique name try: client = marathon.create_client() client.add_app(app_def) shakedown.deployment_wait() # Create the app: the volume should be successfully created common.assert_app_tasks_running(client, app_def) common.assert_app_tasks_healthy(client, app_def) # Scale down to 0 client.stop_app(app_id) shakedown.deployment_wait() # Scale up again: the volume should be successfully reused client.scale_app(app_id, 1) shakedown.deployment_wait() common.assert_app_tasks_running(client, app_def) common.assert_app_tasks_healthy(client, app_def) # Remove the app to be able to remove the volume client.remove_app(app_id) shakedown.deployment_wait() except Exception as e: print('Fail to test external volumes: {}'.format(e)) raise e finally: # Clean up after the test: external volumes are not destroyed by marathon or dcos # and have to be cleaned manually. cmd = 'sudo /opt/mesosphere/bin/dvdcli remove --volumedriver=rexray --volumename={}'.format( volume_name) removed = False for agent in shakedown.get_private_agents(): status, output = shakedown.run_command_on_agent(agent, cmd) # NOQA print( 'DEBUG: Failed to remove external volume with name={} on agent={}: {}' .format(volume_name, agent, output)) if status: removed = True # Note: Removing the volume might fail sometimes because EC2 takes some time (~10min) to recognize that # the volume is not in use anymore hence preventing it's removal. This is a known pitfall: we log the error # and the volume should be cleaned up manually later. if not removed: print('WARNING: Failed to remove external volume with name={}'. format(volume_name))
def test_external_volume(): volume_name = "marathon-si-test-vol-{}".format(uuid.uuid4().hex) app_def = apps.external_volume_mesos_app() app_def["container"]["volumes"][0]["external"]["name"] = volume_name app_id = app_def['id'] # Tested with root marathon since MoM doesn't have # --enable_features external_volumes option activated. # First deployment should create the volume since it has a unique name try: client = marathon.create_client() client.add_app(app_def) shakedown.deployment_wait() # Create the app: the volume should be successfully created common.assert_app_tasks_running(client, app_def) common.assert_app_tasks_healthy(client, app_def) # Scale down to 0 client.stop_app(app_id) shakedown.deployment_wait() # Scale up again: the volume should be successfully reused client.scale_app(app_id, 1) shakedown.deployment_wait() common.assert_app_tasks_running(client, app_def) common.assert_app_tasks_healthy(client, app_def) # Remove the app to be able to remove the volume client.remove_app(app_id) shakedown.deployment_wait() except Exception as e: print('Fail to test external volumes: {}'.format(e)) raise e finally: # Clean up after the test: external volumes are not destroyed by marathon or dcos # and have to be cleaned manually. cmd = 'sudo /opt/mesosphere/bin/dvdcli remove --volumedriver=rexray --volumename={}'.format(volume_name) removed = False for agent in shakedown.get_private_agents(): status, output = shakedown.run_command_on_agent(agent, cmd) # NOQA print('DEBUG: Failed to remove external volume with name={} on agent={}: {}'.format( volume_name, agent, output)) if status: removed = True # Note: Removing the volume might fail sometimes because EC2 takes some time (~10min) to recognize that # the volume is not in use anymore hence preventing it's removal. This is a known pitfall: we log the error # and the volume should be cleaned up manually later. if not removed: print('WARNING: Failed to remove external volume with name={}'.format(volume_name))
def test_private_repository_docker_app(): username = os.environ['DOCKER_HUB_USERNAME'] password = os.environ['DOCKER_HUB_PASSWORD'] agents = shakedown.get_private_agents() common.create_docker_credentials_file(username, password) common.copy_docker_credentials_file(agents) app_def = apps.private_docker_app() client = marathon.create_client() client.add_app(app_def) shakedown.deployment_wait() common.assert_app_tasks_running(client, app_def)
def get_node_principals(): """Get a list of zookeeper principals for the agent nodes in the cluster""" principals = [] agent_ips = shakedown.get_private_agents() agent_dashed_ips = list(map( lambda ip: "ip-{dashed_ip}".format(dashed_ip="-".join(ip.split("."))), agent_ips)) for b in agent_dashed_ips: principals.append("zookeeper/{instance}.{domain}@{realm}".format( instance=b, # TODO(elezar) we need to infer the region too domain="us-west-2.compute.internal", realm=sdk_auth.REALM)) return principals
def archive_sandboxes(): # Nothing to setup yield print('>>> Archiving Mesos sandboxes') # We tarball the sandboxes from all the agents first and download them afterwards for agent in shakedown.get_private_agents(): file_name = 'sandbox_{}.tar.gz'.format(agent.replace(".", "_")) cmd = 'sudo tar --exclude=provisioner -zcf {} /var/lib/mesos/slave'.format( file_name) status, output = shakedown.run_command_on_agent(agent, cmd) # NOQA if status: shakedown.copy_file_from_agent(agent, file_name) else: print( 'DEBUG: Failed to tarball the sandbox from the agent={}, output={}' .format(agent, output))
def test_cluster(): sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME) some_agent = shakedown.get_private_agents().pop() options = { "service": { "spec_file": "examples/marathon_constraint.yml" }, "hello": { "count": num_private_agents, "placement": "hostname:CLUSTER:{}".format(some_agent) }, "world": { "count": 0 } } sdk_install.install(config.PACKAGE_NAME, config.SERVICE_NAME, num_private_agents, additional_options=options) ensure_count_per_agent(hello_count=num_private_agents, world_count=0)
def test_private_repository_docker_app(): # Create and copy docker credentials to all private agents assert 'DOCKER_HUB_USERNAME' in os.environ, "Couldn't find docker hub username. $DOCKER_HUB_USERNAME is not set" assert 'DOCKER_HUB_PASSWORD' in os.environ, "Couldn't find docker hub password. $DOCKER_HUB_PASSWORD is not set" username = os.environ['DOCKER_HUB_USERNAME'] password = os.environ['DOCKER_HUB_PASSWORD'] agents = shakedown.get_private_agents() common.create_docker_credentials_file(username, password) common.copy_docker_credentials_file(agents) client = marathon.create_client() app_def = common.private_docker_container_app() client.add_app(app_def) shakedown.deployment_wait() common.assert_app_tasks_running(client, app_def)
def test_cluster(): sdk_install.uninstall(PACKAGE_NAME) some_agent = shakedown.get_private_agents().pop() options = { "service": { "spec_file": "examples/marathon_constraint.yml" }, "hello": { "count": num_private_agents, "placement": "hostname:CLUSTER:{}".format(some_agent) }, "world": { "count": 0 } } sdk_install.install(PACKAGE_NAME, num_private_agents, additional_options=options) ensure_count_per_agent(hello_count=num_private_agents, world_count=0)
def cluster_info(mom_name='marathon-user'): print("DC/OS: {}, in {} mode".format(shakedown.dcos_version(), shakedown.ee_version())) agents = shakedown.get_private_agents() print("Agents: {}".format(len(agents))) client = marathon.create_client() about = client.get_about() print("Marathon version: {}".format(about.get("version"))) if shakedown.service_available_predicate(mom_name): with shakedown.marathon_on_marathon(mom_name): try: client = marathon.create_client() about = client.get_about() print("Marathon MoM version: {}".format(about.get("version"))) except Exception: print("Marathon MoM not present") else: print("Marathon MoM not present")
def test_external_volume(): volume_name = "marathon-si-test-vol-{}".format(uuid.uuid4().hex) app_def = common.external_volume_mesos_app(volume_name) app_id = app_def['id'] # Tested with root marathon since MoM doesn't have # --enable_features external_volumes option activated. # First deployment should create the volume since it has a unique name try: client = marathon.create_client() client.add_app(app_def) shakedown.deployment_wait() # Create the app: the volume should be successfully created common.assert_app_tasks_running(client, app_def) common.assert_app_tasks_healthy(client, app_def) # Scale down to 0 client.stop_app(app_id) shakedown.deployment_wait() # Scale up again: the volume should be successfully reused client.scale_app(app_id, 1) shakedown.deployment_wait() common.assert_app_tasks_running(client, app_def) common.assert_app_tasks_healthy(client, app_def) # Remove the app to be able to remove the volume client.remove_app(app_id) shakedown.deployment_wait() except Exception as e: print('Fail to test external volumes: {}'.format(e)) raise e finally: # Clean up after the test: external volumes are not destroyed by marathon or dcos # and have to be cleaned manually. agent = shakedown.get_private_agents()[0] result, output = shakedown.run_command_on_agent( agent, 'sudo /opt/mesosphere/bin/dvdcli remove --volumedriver=rexray --volumename={}' .format(volume_name)) assert result, 'Failed to remove external volume with name={}: {}'.format( volume_name, output)
def test_cluster(): sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME) some_agent = shakedown.get_private_agents().pop() options = _escape_placement_for_1_9({ "service": { "yaml": "marathon_constraint" }, "hello": { "count": config.get_num_private_agents(), "placement": "[[\"hostname\", \"CLUSTER\", \"{}\"]]".format(some_agent) }, "world": { "count": 0 } }) sdk_install.install(config.PACKAGE_NAME, config.SERVICE_NAME, config.get_num_private_agents(), additional_options=options) ensure_count_per_agent(hello_count=config.get_num_private_agents(), world_count=0)
def test_private_repository_docker_app(): username = os.environ['DOCKER_HUB_USERNAME'] password = os.environ['DOCKER_HUB_PASSWORD'] agents = shakedown.get_private_agents() common.create_docker_credentials_file(username, password) common.copy_docker_credentials_file(agents) app_def = apps.private_docker_app() app_id = app_def["id"] if shakedown.ee_version() == 'strict': app_def['user'] = '******' common.add_dcos_marathon_user_acls() client = marathon.create_client() client.add_app(app_def) common.deployment_wait(service_id=app_id) common.assert_app_tasks_running(client, app_def)
def get_private_ip(): agents = shakedown.get_private_agents() for agent in agents: return agent
def setup_module(module): agents = shakedown.get_private_agents() if len(agents) < 2: assert False, "Incorrect Agent count" remove_jobs()
def gc_frameworks(): for host in shakedown.get_private_agents(): shakedown.run_command(host, "sudo rm -rf /var/lib/mesos/slave/slaves/*/frameworks/*")
def get_num_private_agents(): return len(shakedown.get_private_agents())
import json import logging import pytest import sdk_cmd import sdk_install import sdk_marathon import sdk_plan import sdk_tasks import sdk_utils import shakedown from tests import config log = logging.getLogger(__name__) num_private_agents = len(shakedown.get_private_agents()) @pytest.fixture(scope='module', autouse=True) def configure_package(configure_security): try: sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME) yield # let the test session execute finally: sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME) @sdk_utils.dcos_1_9_or_higher @pytest.mark.smoke @pytest.mark.sanity
import pytest import json import time import traceback import shakedown import sdk_cmd as cmd import sdk_install as install import sdk_plan as plan import sdk_tasks as tasks import sdk_marathon as marathon from tests.config import (PACKAGE_NAME) num_private_agents = len(shakedown.get_private_agents()) def setup_module(module): install.uninstall(PACKAGE_NAME) def teardown_module(module): install.uninstall(PACKAGE_NAME) @pytest.mark.smoke @pytest.mark.sanity def test_rack_not_found(): options = { 'service': {
def gc_frameworks(): '''Reclaims private agent disk space consumed by Mesos but not yet garbage collected''' for host in shakedown.get_private_agents(): shakedown.run_command( host, "sudo rm -rf /var/lib/mesos/slave/slaves/*/frameworks/*")
def gc_frameworks(): for host in shakedown.get_private_agents(): shakedown.run_command( host, "sudo rm -rf /var/lib/mesos/slave/slaves/*/frameworks/*")