def partition_agent(agent_host: str): rc, _, _ = sdk_cmd.agent_ssh( agent_host, " && ".join( [ # Nice to have for any debugging. "hostname", "echo Saving current rules...", "sudo iptables -L > /dev/null", "sudo iptables-save > iptables.backup", "echo Flushing rules...", "sudo iptables -F INPUT", "echo Allowing all traffic...", "sudo iptables --policy INPUT ACCEPT", "sudo iptables --policy OUTPUT ACCEPT", "sudo iptables --policy FORWARD ACCEPT", "echo Cutting off mesos...", "sudo iptables -I INPUT -p tcp --dport 22 -j ACCEPT", # allow SSH "sudo iptables -I INPUT -p icmp -j ACCEPT", # allow ping "sudo iptables -I OUTPUT -p tcp --sport 5051 -j REJECT", # disallow mesos "sudo iptables -A INPUT -j REJECT", # disallow all other input ] ), ) assert rc == 0, "Failed to partition agent"
def partition_agent(agent_host: str) -> None: rc, _, _ = sdk_cmd.agent_ssh( agent_host, " && ".join( [ # Nice to have for any debugging. "hostname", "echo Saving current rules...", "sudo iptables -L > /dev/null", "sudo iptables-save > iptables.backup", "echo Flushing rules...", "sudo iptables -F INPUT", "echo Allowing all traffic...", "sudo iptables --policy INPUT ACCEPT", "sudo iptables --policy OUTPUT ACCEPT", "sudo iptables --policy FORWARD ACCEPT", "echo Cutting off mesos...", "sudo iptables -I INPUT -p tcp --dport 22 -j ACCEPT", # allow SSH "sudo iptables -I INPUT -p icmp -j ACCEPT", # allow ping "sudo iptables -I OUTPUT -p tcp --sport 5051 -j REJECT", # disallow mesos "sudo iptables -A INPUT -j REJECT", # disallow all other input ] ), ) assert rc == 0, "Failed to partition agent"
def reconnect_agent(agent_host: str): # restore prior rules: rc, _, _ = sdk_cmd.agent_ssh( agent_host, " && ".join([ # Nice to have for any debugging. "hostname", "echo Restoring previous rules...", "sudo iptables-restore < iptables.backup", "sudo rm -f iptables.backup", ]), ) assert rc == 0, "Failed to reconnect agent"
def reconnect_agent(agent_host: str) -> None: # restore prior rules: rc, _, _ = sdk_cmd.agent_ssh( agent_host, " && ".join( [ # Nice to have for any debugging. "hostname", "echo Restoring previous rules...", "sudo iptables-restore < iptables.backup", "sudo rm -f iptables.backup", ] ), ) assert rc == 0, "Failed to reconnect agent"
def check_content(): ok, pv_content = sdk_cmd.agent_ssh(host, "cat /var/lib/mesos/slave/volumes/roles/slave_public/{}/test".format(pv_name)) assert pv_content.strip() == "this is a test"
def test_marathon_volume_collission(): # This test validates that a service registered in a sub-role of # slave_public will _not_ unreserve Marathon volumes RESERVED # in the `slave_public` role. # Uninstall HW first sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME) # Install the marathon app marathon_app_name = "persistent-test" persistent_app = { "id": marathon_app_name, "mem": 128, "user": "******", "cmd": "echo 'this is a test' > persistent-volume/test && sleep 10000", "container": { "type": "MESOS", "volumes": [ { "persistent": { "type": "root", "size": 500, "constraints": [] }, "mode": "RW", "containerPath": "persistent-volume" } ] } } try: sdk_marathon.install_app(persistent_app) # Get its persistent Volume host = sdk_marathon.get_scheduler_host(marathon_app_name) ok, pv_name = sdk_cmd.agent_ssh(host, "ls /var/lib/mesos/slave/volumes/roles/slave_public") assert ok pv_name = pv_name.strip() @retrying.retry(wait_fixed=1000, stop_max_delay=60*1000) def check_content(): ok, pv_content = sdk_cmd.agent_ssh(host, "cat /var/lib/mesos/slave/volumes/roles/slave_public/{}/test".format(pv_name)) assert pv_content.strip() == "this is a test" check_content() # Scale down the Marathon app app_config = sdk_marathon.get_config(marathon_app_name) app_config['instances'] = 0 sdk_marathon.update_app(marathon_app_name, app_config) # Install Hello World sdk_install.install(config.PACKAGE_NAME, config.SERVICE_NAME, config.DEFAULT_TASK_COUNT, additional_options=pre_reserved_options) # Make sure the persistent volume is still there check_content() # Uninstall Hello World sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME) # Make sure the persistent volume is still there check_content() # Scale back up the marathon app app_config = sdk_marathon.get_config(marathon_app_name) app_config['instances'] = 1 sdk_marathon.update_app(marathon_app_name, app_config) # Make sure the persistent volume is still there check_content() finally: # Reinstall hello world sdk_install.install(config.PACKAGE_NAME, config.SERVICE_NAME, config.DEFAULT_TASK_COUNT, additional_options=pre_reserved_options) sdk_marathon.destroy_app(marathon_app_name)
def fn(): rc, stdout, _ = sdk_cmd.agent_ssh(agent_host, "sudo shutdown -h +1") log.info('Shutdown agent {}: rc={}, stdout="{}"'.format( agent_host, rc, stdout)) return rc == 0
def test_marathon_volume_collision(): # This test validates that a service registered in a sub-role of # slave_public will _not_ unreserve Marathon volumes RESERVED # in the `slave_public` role. # Uninstall HW first sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME) # Install the marathon app marathon_app_name = "persistent-test" volume_name = "persistent-volume" persistent_app = { "id": marathon_app_name, "mem": 128, "user": "******", "cmd": "echo 'this is a test' > {}/test && sleep 10000".format(volume_name), "container": { "type": "MESOS", "volumes": [{ "persistent": { "type": "root", "size": 500, "constraints": [] }, "mode": "RW", "containerPath": volume_name, }], }, } try: sdk_marathon.install_app(persistent_app) # Get its persistent Volume host = sdk_marathon.get_scheduler_host(marathon_app_name) # Should get e.g.: "/var/lib/mesos/slave/volumes/roles/slave_public/persistent-test#persistent-volume#76e7bb6d-64fa-11e8-abc5-8e679b292d5e" rc, pv_path, _ = sdk_cmd.agent_ssh( host, "ls -d /var/lib/mesos/slave/volumes/roles/slave_public/{}#{}#*". format(marathon_app_name, volume_name), ) if rc != 0: log.error("Could not get slave_public roles. return-code: '%s'\n", rc) assert rc == 0 pv_path = pv_path.strip() @retrying.retry(wait_fixed=1000, stop_max_delay=60 * 1000) def check_content(): rc, pv_content, _ = sdk_cmd.agent_ssh( host, "cat {}/test".format(pv_path)) assert rc == 0 and pv_content.strip() == "this is a test" check_content() # Scale down the Marathon app app_config = sdk_marathon.get_config(marathon_app_name) app_config["instances"] = 0 sdk_marathon.update_app(app_config) # Install Hello World sdk_install.install( config.PACKAGE_NAME, config.SERVICE_NAME, PRERESERVED_TASK_COUNT, additional_options=pre_reserved_options, ) # Make sure the persistent volume is still there check_content() # Uninstall Hello World sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME) # Make sure the persistent volume is still there check_content() # Scale back up the marathon app app_config = sdk_marathon.get_config(marathon_app_name) app_config["instances"] = 1 sdk_marathon.update_app(app_config) # Make sure the persistent volume is still there check_content() finally: # Reinstall hello world sdk_install.install( config.PACKAGE_NAME, config.SERVICE_NAME, PRERESERVED_TASK_COUNT, additional_options=pre_reserved_options, ) sdk_marathon.destroy_app(marathon_app_name)
def check_content(): rc, pv_content, _ = sdk_cmd.agent_ssh( host, "cat {}/test".format(pv_path)) assert rc == 0 and pv_content.strip() == "this is a test"
def fn() -> bool: rc, stdout, _ = sdk_cmd.agent_ssh(agent_host, "sudo shutdown -h +1") log.info('Shutdown agent {}: rc={}, stdout="{}"'.format(agent_host, rc, stdout)) return rc == 0
def test_backup_and_restore_to_s3_compatible_storage() -> None: try: sdk_install.install( "minio", "minio", expected_running_tasks=0, package_version="0.0.13-RELEASE.2018-10-06T00-15-16Z", wait_for_deployment=False, ) temp_key_id = os.getenv("AWS_ACCESS_KEY_ID") if not temp_key_id: assert ( False ), 'AWS credentials are required for this test. Disable test with e.g. TEST_TYPES="sanity and not aws"' temp_secret_access_key = os.getenv("AWS_SECRET_ACCESS_KEY") options = "" if sdk_utils.is_strict_mode(): sdk_security.create_service_account( service_account_name="marathon-lb-sa", service_account_secret="marathon-lb/service-account-secret", ) sdk_cmd.run_cli( "security org users grant marathon-lb-sa dcos:service:marathon:marathon:services:/ read" ) sdk_cmd.run_cli( 'security org users grant marathon-lb-sa dcos:service:marathon:marathon:admin:events read --description "Allows access to Marathon events"' ) options = { "marathon-lb": { "secret_name": "marathon-lb/service-account-secret", "marathon-uri": "https://marathon.mesos:8443", } } sdk_install.install( "marathon-lb", "marathon-lb", expected_running_tasks=0, additional_options=options, package_version="1.14.0", wait_for_deployment=False, ) host = sdk_marathon.get_scheduler_host("marathon-lb") _, public_node_ip, _ = sdk_cmd.agent_ssh(host, "curl -s ifconfig.co") minio_endpoint_url = "http://" + public_node_ip + ":9000" os.environ["AWS_ACCESS_KEY_ID"] = config.MINIO_AWS_ACCESS_KEY_ID os.environ["AWS_SECRET_ACCESS_KEY"] = config.MINIO_AWS_SECRET_ACCESS_KEY subprocess.run( [ "aws", "s3", "mb", "s3://" + config.MINIO_BUCKET_NAME, "--endpoint", minio_endpoint_url, ] ) plan_parameters = { "AWS_ACCESS_KEY_ID": os.getenv("AWS_ACCESS_KEY_ID"), "AWS_SECRET_ACCESS_KEY": os.getenv("AWS_SECRET_ACCESS_KEY"), "AWS_REGION": os.getenv("AWS_REGION", "us-west-2"), "S3_BUCKET_NAME": config.MINIO_BUCKET_NAME, "SNAPSHOT_NAME": str(uuid.uuid1()), "CASSANDRA_KEYSPACES": '"testspace1 testspace2"', "S3_ENDPOINT_URL": minio_endpoint_url, } config.run_backup_and_restore( config.get_foldered_service_name(), "backup-s3", "restore-s3", plan_parameters, config.get_foldered_node_address(), ) finally: sdk_install.uninstall("minio", "minio") sdk_install.uninstall("marathon-lb", "marathon-lb") os.environ["AWS_ACCESS_KEY_ID"] = temp_key_id os.environ["AWS_SECRET_ACCESS_KEY"] = temp_secret_access_key
def check_content(): rc, pv_content, _ = sdk_cmd.agent_ssh(host, "cat {}/test".format(pv_path)) assert rc == 0 and pv_content.strip() == "this is a test"
def test_marathon_volume_collision(): # This test validates that a service registered in a sub-role of # slave_public will _not_ unreserve Marathon volumes RESERVED # in the `slave_public` role. # Uninstall HW first sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME) # Install the marathon app marathon_app_name = "persistent-test" volume_name = "persistent-volume" persistent_app = { "id": marathon_app_name, "mem": 128, "user": "******", "cmd": "echo 'this is a test' > {}/test && sleep 10000".format(volume_name), "container": { "type": "MESOS", "volumes": [ { "persistent": {"type": "root", "size": 500, "constraints": []}, "mode": "RW", "containerPath": volume_name, } ], }, } try: sdk_marathon.install_app(persistent_app) # Get its persistent Volume host = sdk_marathon.get_scheduler_host(marathon_app_name) # Should get e.g.: "/var/lib/mesos/slave/volumes/roles/slave_public/persistent-test#persistent-volume#76e7bb6d-64fa-11e8-abc5-8e679b292d5e" rc, pv_path, _ = sdk_cmd.agent_ssh( host, "ls -d /var/lib/mesos/slave/volumes/roles/slave_public/{}#{}#*".format( marathon_app_name, volume_name ), ) if rc != 0: log.error( "Could not get slave_public roles. return-code: '%s'\n", rc) assert rc == 0 pv_path = pv_path.strip() @retrying.retry(wait_fixed=1000, stop_max_delay=60 * 1000) def check_content(): rc, pv_content, _ = sdk_cmd.agent_ssh(host, "cat {}/test".format(pv_path)) assert rc == 0 and pv_content.strip() == "this is a test" check_content() # Scale down the Marathon app app_config = sdk_marathon.get_config(marathon_app_name) app_config["instances"] = 0 sdk_marathon.update_app(app_config) # Install Hello World sdk_install.install( config.PACKAGE_NAME, config.SERVICE_NAME, config.DEFAULT_TASK_COUNT, additional_options=pre_reserved_options, ) # Make sure the persistent volume is still there check_content() # Uninstall Hello World sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME) # Make sure the persistent volume is still there check_content() # Scale back up the marathon app app_config = sdk_marathon.get_config(marathon_app_name) app_config["instances"] = 1 sdk_marathon.update_app(app_config) # Make sure the persistent volume is still there check_content() finally: # Reinstall hello world sdk_install.install( config.PACKAGE_NAME, config.SERVICE_NAME, config.DEFAULT_TASK_COUNT, additional_options=pre_reserved_options, ) sdk_marathon.destroy_app(marathon_app_name)
def install_krb_workstation(): for agent in sdk_agents.get_agents(): sdk_cmd.agent_ssh(agent["hostname"], "sudo yum install krb5-workstation -y") yield
def check_content(): ok, pv_content = sdk_cmd.agent_ssh(host, "cat {}/test".format(pv_path)) assert pv_content.strip() == "this is a test"