def test_private_repository_mesos_app(): """Deploys an app with a private Docker image, using Mesos containerizer.""" if not common.is_enterprise_cli_package_installed(): common.install_enterprise_cli_package() username = os.environ['DOCKER_HUB_USERNAME'] password = os.environ['DOCKER_HUB_PASSWORD'] secret_name = "pullConfig" secret_value_json = common.create_docker_pull_config_json(username, password) secret_value = json.dumps(secret_value_json) app_def = apps.private_ucr_docker_app() # In strict mode all tasks are started as user `nobody` by default and `nobody` # doesn't have permissions to write to /var/log within the container. if shakedown.ee_version() == 'strict': app_def['user'] = '******' common.add_dcos_marathon_root_user_acls() common.create_secret(secret_name, secret_value) client = marathon.create_client() try: client.add_app(app_def) shakedown.deployment_wait() common.assert_app_tasks_running(client, app_def) finally: common.delete_secret(secret_name)
def cluster_info(): print("DC/OS: {}, in {} mode".format(shakedown.dcos_version(), shakedown.ee_version())) agents = shakedown.get_private_agents() print("Agents: {}".format(len(agents))) about = metronome_version() print("Marathon version: {}".format(about))
def _upgrade_or_downgrade(from_package_name, to_package_name, to_package_version, service_name, running_task_count, additional_options, timeout_seconds): task_ids = tasks.get_task_ids(service_name, '') if shakedown.dcos_version_less_than("1.10") or shakedown.ee_version( ) is None or from_package_name != to_package_name: log.info('Using marathon upgrade flow to upgrade {} => {} {}'.format( from_package_name, to_package_name, to_package_version)) marathon.destroy_app(service_name) install.install(to_package_name, running_task_count, service_name=service_name, additional_options=additional_options, timeout_seconds=timeout_seconds, package_version=to_package_version) else: log.info('Using CLI upgrade flow to upgrade {} => {} {}'.format( from_package_name, to_package_name, to_package_version)) if additional_options: with tempfile.NamedTemporaryFile() as opts_f: opts_f.write(json.dumps(additional_options).encode('utf-8')) opts_f.flush() # ensure json content is available for the CLI cmd.run_cli( '{} --name={} update start --package-version={} --options={}' .format(to_package_name, service_name, to_package_version, opts_f.name)) else: cmd.run_cli( '{} --name={} update start --package-version={}'.format( to_package_name, service_name, to_package_version)) log.info('Checking that all tasks have restarted') tasks.check_tasks_updated(service_name, '', task_ids)
async def test_event_channel_for_pods(sse_events): """Tests the Marathon event channel specific to pod events.""" await common.assert_event('event_stream_attached', sse_events) pod_def = pods.simple_pod() pod_id = pod_def['id'] # In strict mode all tasks are started as user `nobody` by default and `nobody` # doesn't have permissions to write files. if shakedown.ee_version() == 'strict': pod_def['user'] = '******' common.add_dcos_marathon_user_acls() client = marathon.create_client() client.add_pod(pod_def) common.deployment_wait(service_id=pod_id) await common.assert_event('pod_created_event', sse_events) await common.assert_event('deployment_step_success', sse_events) pod_def["scaling"]["instances"] = 3 client.update_pod(pod_id, pod_def) common.deployment_wait(service_id=pod_id) await common.assert_event('pod_updated_event', sse_events)
def test_private_repository_mesos_app(): """Deploys an app with a private Docker image, using Mesos containerizer.""" if not common.is_enterprise_cli_package_installed(): common.install_enterprise_cli_package() username = os.environ['DOCKER_HUB_USERNAME'] password = os.environ['DOCKER_HUB_PASSWORD'] secret_name = "pullConfig" secret_value_json = common.create_docker_pull_config_json( username, password) secret_value = json.dumps(secret_value_json) app_def = apps.private_ucr_docker_app() # Here we're starting an nignx server in a container. In a strict mode however # all tasks are started as user `nobody` and `nobody` doesn't have permissions # to write to /var/log within the container. To avoid this we override the cmd # with a simple `sleep`. This is a hacky workaround but the test is still valid # since we're testing `pullConfig` feature. if shakedown.ee_version() == 'strict': app_def['cmd'] = 'sleep 10000000' common.create_secret(secret_name, secret_value) client = marathon.create_client() try: client.add_app(app_def) shakedown.deployment_wait() common.assert_app_tasks_running(client, app_def) finally: common.delete_secret(secret_name)
def events_to_file(): leader_ip = shakedown.marathon_leader_ip() print("entering events_to_file fixture") shakedown.run_command(leader_ip, 'rm events.txt') # In strict mode marathon runs in SSL mode on port 8443 and requires authentication if shakedown.ee_version() == 'strict': shakedown.run_command( leader_ip, '(curl --compressed -H "Cache-Control: no-cache" -H "Accept: text/event-stream" ' + '-H "Authorization: token={}" '.format( shakedown.dcos_acs_token()) + '-o events.txt -k https://marathon.mesos:8443/v2/events; echo $? > events.exitcode) &' ) # Otherwise marathon runs on HTTP mode on port 8080 else: shakedown.run_command( leader_ip, '(curl --compressed -H "Cache-Control: no-cache" -H "Accept: text/event-stream" ' '-o events.txt http://marathon.mesos:8080/v2/events; echo $? > events.exitcode) &' ) yield shakedown.kill_process_on_host(leader_ip, '[c]url') shakedown.run_command(leader_ip, 'rm events.txt') shakedown.run_command(leader_ip, 'rm events.exitcode') print("exiting events_to_file fixture")
def test_pod_with_container_bridge_network(): """Tests creation of a pod with a "container/bridge" network, and its HTTP endpoint accessibility.""" pod_def = pods.container_bridge_pod() pod_id = pod_def['id'] # In strict mode all tasks are started as user `nobody` by default and `nobody` # doesn't have permissions to write to /var/log within the container. if shakedown.ee_version() == 'strict': pod_def['user'] = '******' common.add_dcos_marathon_user_acls() client = marathon.create_client() client.add_pod(pod_def) common.deployment_wait(service_id=pod_id) task = common.get_pod_tasks(pod_id)[0] network_info = task['statuses'][0]['container_status']['network_infos'][0] assert network_info['name'] == "mesos-bridge", \ "The network is {}, but mesos-bridge was expected".format(network_info['name']) # get the port on the host port = task['discovery']['ports']['ports'][0]['number'] # the agent IP:port will be routed to the bridge IP:port # test against the agent_ip, however it is hard to get.. translating from # slave_id agent_ip = common.agent_hostname_by_id(task['slave_id']) assert agent_ip is not None, "Failed to get the agent IP address" container_ip = network_info['ip_addresses'][0]['ip_address'] assert agent_ip != container_ip, "The container IP address is the same as the agent one" url = "http://{}:{}/".format(agent_ip, port) common.assert_http_code(url)
def test_pod_with_container_bridge_network(): """Tests creation of a pod with a "container/bridge" network, and its HTTP endpoint accessibility.""" pod_def = pods.container_bridge_pod() pod_id = pod_def['id'] # In strict mode all tasks are started as user `nobody` by default and `nobody` # doesn't have permissions to write to /var/log within the container. if shakedown.ee_version() == 'strict': pod_def['user'] = '******' common.add_dcos_marathon_user_acls() client = marathon.create_client() client.add_pod(pod_def) common.deployment_wait(service_id=pod_id) task = common.task_by_name(common.get_pod_tasks(pod_id), "nginx") network_info = common.running_status_network_info(task['statuses']) assert network_info['name'] == "mesos-bridge", \ "The network is {}, but mesos-bridge was expected".format(network_info['name']) # get the port on the host port = task['discovery']['ports']['ports'][0]['number'] # the agent IP:port will be routed to the bridge IP:port # test against the agent_ip, however it is hard to get.. translating from # slave_id agent_ip = common.agent_hostname_by_id(task['slave_id']) assert agent_ip is not None, "Failed to get the agent IP address" container_ip = network_info['ip_addresses'][0]['ip_address'] assert agent_ip != container_ip, "The container IP address is the same as the agent one" url = "http://{}:{}/".format(agent_ip, port) common.assert_http_code(url)
def test_pod_with_container_network(): """Tests creation of a pod with a "container" network, and its HTTP endpoint accessibility.""" pod_def = pods.container_net_pod() pod_id = pod_def['id'] # In strict mode all tasks are started as user `nobody` by default and `nobody` # doesn't have permissions to write to /var/log within the container. if shakedown.ee_version() == 'strict': pod_def['user'] = '******' common.add_dcos_marathon_user_acls() client = marathon.create_client() client.add_pod(pod_def) common.deployment_wait(service_id=pod_id) task = common.task_by_name(common.get_pod_tasks(pod_id), "nginx") network_info = common.running_status_network_info(task['statuses']) assert network_info['name'] == "dcos", \ "The network name is {}, but 'dcos' was expected".format(network_info['name']) container_ip = network_info['ip_addresses'][0]['ip_address'] assert container_ip is not None, "No IP address has been assigned to the pod's container" url = "http://{}:80/".format(container_ip) common.assert_http_code(url)
def test_pod_with_container_network(): """Tests creation of a pod with a "container" network, and its HTTP endpoint accessibility.""" pod_def = pods.container_net_pod() pod_id = pod_def['id'] # In strict mode all tasks are started as user `nobody` by default and `nobody` # doesn't have permissions to write to /var/log within the container. if shakedown.ee_version() == 'strict': pod_def['user'] = '******' common.add_dcos_marathon_user_acls() client = marathon.create_client() client.add_pod(pod_def) common.deployment_wait(service_id=pod_id) tasks = common.get_pod_tasks(pod_id) network_info = tasks[0]['statuses'][0]['container_status'][ 'network_infos'][0] assert network_info['name'] == "dcos", \ "The network name is {}, but 'dcos' was expected".format(network_info['name']) container_ip = network_info['ip_addresses'][0]['ip_address'] assert container_ip is not None, "No IP address has been assigned to the pod's container" url = "http://{}:80/".format(container_ip) common.assert_http_code(url)
def _upgrade_or_downgrade(package_name, to_package_version, service_name, running_task_count, additional_options, timeout_seconds, wait_for_deployment): initial_config = get_config(package_name, service_name) task_ids = sdk_tasks.get_task_ids(service_name, '') if sdk_utils.dcos_version_less_than( "1.10") or shakedown.ee_version() is None: log.info('Using marathon upgrade flow to upgrade {} {}'.format( package_name, to_package_version)) sdk_marathon.destroy_app(service_name) sdk_install.install(package_name, service_name, running_task_count, additional_options=additional_options, package_version=to_package_version, timeout_seconds=timeout_seconds, wait_for_deployment=wait_for_deployment) else: log.info('Using CLI upgrade flow to upgrade {} {}'.format( package_name, to_package_version)) if additional_options: with tempfile.NamedTemporaryFile() as opts_f: opts_f.write(json.dumps(additional_options).encode('utf-8')) opts_f.flush( ) # ensure json content is available for the CLI to read below sdk_cmd.svc_cli( package_name, service_name, 'update start --package-version={} --options={}'.format( to_package_version, opts_f.name)) else: sdk_cmd.svc_cli( package_name, service_name, 'update start --package-version={}'.format(to_package_version)) # we must manually upgrade the package CLI because it's not done automatically in this flow # (and why should it? that'd imply the package CLI replacing itself via a call to the main CLI...) sdk_cmd.run_cli( 'package install --yes --cli --package-version={} {}'.format( to_package_version, package_name)) if wait_for_deployment: updated_config = get_config(package_name, service_name) if updated_config == initial_config: log.info( 'No config change detected. Tasks should not be restarted') sdk_tasks.check_tasks_not_updated(service_name, '', task_ids) else: log.info('Checking that all tasks have restarted') sdk_tasks.check_tasks_updated(service_name, '', task_ids) # this can take a while, default is 15 minutes. for example with HDFS, we can hit the expected # total task count via ONCE tasks, without actually completing deployment log.info( "Waiting for package={} service={} to finish deployment plan...". format(package_name, service_name)) sdk_plan.wait_for_completed_deployment(service_name, timeout_seconds)
def _upgrade_or_downgrade(package_name, to_package_version, service_name, running_task_count, additional_options, timeout_seconds, wait_for_deployment): task_ids = sdk_tasks.get_task_ids(service_name, '') if shakedown.dcos_version_less_than( "1.10") or shakedown.ee_version() is None: log.info('Using marathon upgrade flow to upgrade {} {}'.format( package_name, to_package_version)) sdk_marathon.destroy_app(service_name) sdk_install.install(package_name, service_name, running_task_count, additional_options=additional_options, package_version=to_package_version, timeout_seconds=timeout_seconds, wait_for_deployment=wait_for_deployment) else: log.info('Using CLI upgrade flow to upgrade {} {}'.format( package_name, to_package_version)) if additional_options: with tempfile.NamedTemporaryFile() as opts_f: opts_f.write(json.dumps(additional_options).encode('utf-8')) opts_f.flush( ) # ensure json content is available for the CLI to read below sdk_cmd.svc_cli( package_name, service_name, 'update start --package-version={} --options={}'.format( to_package_version, opts_f.name)) else: sdk_cmd.svc_cli( package_name, service_name, 'update start --package-version={}'.format(to_package_version)) if wait_for_deployment: log.info('Checking that all tasks have restarted') sdk_tasks.check_tasks_updated(service_name, '', task_ids) # this can take a while, default is 15 minutes. for example with HDFS, we can hit the expected # total task count via FINISHED tasks, without actually completing deployment log.info("Waiting for {}/{} to finish deployment plan...".format( package_name, service_name)) sdk_plan.wait_for_completed_deployment(service_name, timeout_seconds) # given the above wait for plan completion, here we just wait up to 5 minutes if shakedown.dcos_version_less_than("1.9"): log.info( "Skipping `is_suppressed` check for %s/%s as this is only suppored starting in version 1.9", package_name, service_name) else: log.info("Waiting for %s/%s to be suppressed...", package_name, service_name) shakedown.wait_for(lambda: sdk_api.is_suppressed(service_name), noisy=True, timeout_seconds=5 * 60)
def _upgrade_or_downgrade( package_name, to_package_version, service_name, running_task_count, additional_options, timeout_seconds, wait_for_deployment): task_ids = sdk_tasks.get_task_ids(service_name, '') if shakedown.dcos_version_less_than("1.10") or shakedown.ee_version() is None: log.info('Using marathon upgrade flow to upgrade {} {}'.format(package_name, to_package_version)) sdk_marathon.destroy_app(service_name) sdk_install.install( package_name, service_name, running_task_count, additional_options=additional_options, package_version=to_package_version, timeout_seconds=timeout_seconds, wait_for_deployment=wait_for_deployment) else: log.info('Using CLI upgrade flow to upgrade {} {}'.format(package_name, to_package_version)) if additional_options: with tempfile.NamedTemporaryFile() as opts_f: opts_f.write(json.dumps(additional_options).encode('utf-8')) opts_f.flush() # ensure json content is available for the CLI to read below sdk_cmd.svc_cli( package_name, service_name, 'update start --package-version={} --options={}'.format(to_package_version, opts_f.name)) else: sdk_cmd.svc_cli( package_name, service_name, 'update start --package-version={}'.format(to_package_version)) if wait_for_deployment: log.info('Checking that all tasks have restarted') sdk_tasks.check_tasks_updated(service_name, '', task_ids) # this can take a while, default is 15 minutes. for example with HDFS, we can hit the expected # total task count via FINISHED tasks, without actually completing deployment log.info("Waiting for {}/{} to finish deployment plan...".format( package_name, service_name)) sdk_plan.wait_for_completed_deployment(service_name, timeout_seconds) # given the above wait for plan completion, here we just wait up to 5 minutes if shakedown.dcos_version_less_than("1.9"): log.info("Skipping `is_suppressed` check for %s/%s as this is only suppored starting in version 1.9", package_name, service_name) else: log.info("Waiting for %s/%s to be suppressed...", package_name, service_name) shakedown.wait_for( lambda: sdk_api.is_suppressed(service_name), noisy=True, timeout_seconds=5 * 60)
def test_event_channel_for_pods(): """Tests the Marathon event channel specific to pod events.""" pod_def = pods.simple_pod() pod_id = pod_def['id'] # In strict mode all tasks are started as user `nobody` by default and `nobody` # doesn't have permissions to write files. if shakedown.ee_version() == 'strict': pod_def['user'] = '******' common.add_dcos_marathon_user_acls() client = marathon.create_client() client.add_pod(pod_def) common.deployment_wait(service_id=pod_id) leader_ip = shakedown.marathon_leader_ip() # look for created @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception) def check_deployment_message(): status, stdout = shakedown.run_command(leader_ip, 'cat events.exitcode') assert str(stdout).strip( ) == '', "SSE stream disconnected (CURL exit code is {})".format( stdout.strip()) status, stdout = shakedown.run_command(leader_ip, 'cat events.txt') assert 'event_stream_attached' in stdout, "event_stream_attached event has not been produced" assert 'pod_created_event' in stdout, "pod_created_event event has not been produced" assert 'deployment_step_success' in stdout, "deployment_step_success event has not beed produced" check_deployment_message() pod_def["scaling"]["instances"] = 3 client.update_pod(pod_id, pod_def) common.deployment_wait(service_id=pod_id) # look for updated @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception) def check_update_message(): status, stdout = shakedown.run_command(leader_ip, 'cat events.txt') assert 'pod_updated_event' in stdout, 'pod_update_event event has not been produced' check_update_message()
def cluster_info(mom_name='marathon-user'): print("DC/OS: {}, in {} mode".format(shakedown.dcos_version(), shakedown.ee_version())) agents = shakedown.get_private_agents() print("Agents: {}".format(len(agents))) client = marathon.create_client() about = client.get_about() print("Marathon version: {}".format(about.get("version"))) if shakedown.service_available_predicate(mom_name): with shakedown.marathon_on_marathon(mom_name): try: client = marathon.create_client() about = client.get_about() print("Marathon MoM version: {}".format(about.get("version"))) except Exception: print("Marathon MoM not present") else: print("Marathon MoM not present")
def test_private_repository_docker_app(): username = os.environ['DOCKER_HUB_USERNAME'] password = os.environ['DOCKER_HUB_PASSWORD'] agents = shakedown.get_private_agents() common.create_docker_credentials_file(username, password) common.copy_docker_credentials_file(agents) app_def = apps.private_docker_app() if shakedown.ee_version() == 'strict': app_def['user'] = '******' common.add_dcos_marathon_user_acls() client = marathon.create_client() client.add_app(app_def) shakedown.deployment_wait() common.assert_app_tasks_running(client, app_def)