def _upgrade_or_downgrade( package_name, to_package_version, service_name, running_task_count, additional_options, timeout_seconds, wait_for_deployment): task_ids = sdk_tasks.get_task_ids(service_name, '') if shakedown.dcos_version_less_than("1.10") or shakedown.ee_version() is None: log.info('Using marathon upgrade flow to upgrade {} {}'.format(package_name, to_package_version)) sdk_marathon.destroy_app(service_name) sdk_install.install( package_name, service_name, running_task_count, additional_options=additional_options, package_version=to_package_version, timeout_seconds=timeout_seconds, wait_for_deployment=wait_for_deployment) else: log.info('Using CLI upgrade flow to upgrade {} {}'.format(package_name, to_package_version)) if additional_options: with tempfile.NamedTemporaryFile() as opts_f: opts_f.write(json.dumps(additional_options).encode('utf-8')) opts_f.flush() # ensure json content is available for the CLI to read below sdk_cmd.svc_cli( package_name, service_name, 'update start --package-version={} --options={}'.format(to_package_version, opts_f.name)) else: sdk_cmd.svc_cli( package_name, service_name, 'update start --package-version={}'.format(to_package_version)) if wait_for_deployment: log.info('Checking that all tasks have restarted') sdk_tasks.check_tasks_updated(service_name, '', task_ids) # this can take a while, default is 15 minutes. for example with HDFS, we can hit the expected # total task count via FINISHED tasks, without actually completing deployment log.info("Waiting for {}/{} to finish deployment plan...".format( package_name, service_name)) sdk_plan.wait_for_completed_deployment(service_name, timeout_seconds) # given the above wait for plan completion, here we just wait up to 5 minutes if shakedown.dcos_version_less_than("1.9"): log.info("Skipping `is_suppressed` check for %s/%s as this is only suppored starting in version 1.9", package_name, service_name) else: log.info("Waiting for %s/%s to be suppressed...", package_name, service_name) shakedown.wait_for( lambda: sdk_api.is_suppressed(service_name), noisy=True, timeout_seconds=5 * 60)
def _upgrade_or_downgrade(package_name, to_package_version, service_name, running_task_count, additional_options, timeout_seconds, wait_for_deployment): task_ids = sdk_tasks.get_task_ids(service_name, '') if shakedown.dcos_version_less_than( "1.10") or shakedown.ee_version() is None: log.info('Using marathon upgrade flow to upgrade {} {}'.format( package_name, to_package_version)) sdk_marathon.destroy_app(service_name) sdk_install.install(package_name, service_name, running_task_count, additional_options=additional_options, package_version=to_package_version, timeout_seconds=timeout_seconds, wait_for_deployment=wait_for_deployment) else: log.info('Using CLI upgrade flow to upgrade {} {}'.format( package_name, to_package_version)) if additional_options: with tempfile.NamedTemporaryFile() as opts_f: opts_f.write(json.dumps(additional_options).encode('utf-8')) opts_f.flush( ) # ensure json content is available for the CLI to read below sdk_cmd.svc_cli( package_name, service_name, 'update start --package-version={} --options={}'.format( to_package_version, opts_f.name)) else: sdk_cmd.svc_cli( package_name, service_name, 'update start --package-version={}'.format(to_package_version)) if wait_for_deployment: log.info('Checking that all tasks have restarted') sdk_tasks.check_tasks_updated(service_name, '', task_ids) # this can take a while, default is 15 minutes. for example with HDFS, we can hit the expected # total task count via FINISHED tasks, without actually completing deployment log.info("Waiting for {}/{} to finish deployment plan...".format( package_name, service_name)) sdk_plan.wait_for_completed_deployment(service_name, timeout_seconds) # given the above wait for plan completion, here we just wait up to 5 minutes if shakedown.dcos_version_less_than("1.9"): log.info( "Skipping `is_suppressed` check for %s/%s as this is only suppored starting in version 1.9", package_name, service_name) else: log.info("Waiting for %s/%s to be suppressed...", package_name, service_name) shakedown.wait_for(lambda: sdk_api.is_suppressed(service_name), noisy=True, timeout_seconds=5 * 60)
def configure_package(configure_security): try: sdk_install.uninstall(FOLDERED_SERVICE_NAME, package_name=PACKAGE_NAME) if shakedown.dcos_version_less_than("1.9"): # HDFS upgrade in 1.8 is not supported. sdk_install.install(PACKAGE_NAME, DEFAULT_TASK_COUNT, service_name=FOLDERED_SERVICE_NAME, additional_options={ "service": { "name": FOLDERED_SERVICE_NAME } }, timeout_seconds=30 * 60) else: sdk_upgrade.test_upgrade("beta-{}".format(PACKAGE_NAME), PACKAGE_NAME, DEFAULT_TASK_COUNT, service_name=FOLDERED_SERVICE_NAME, additional_options={ "service": { "name": FOLDERED_SERVICE_NAME } }, timeout_seconds=30 * 60) yield # let the test session execute finally: sdk_install.uninstall(FOLDERED_SERVICE_NAME, package_name=PACKAGE_NAME)
def configure_package(configure_security): try: foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME) sdk_install.uninstall(config.PACKAGE_NAME, foldered_name) if shakedown.dcos_version_less_than("1.9"): # HDFS upgrade in 1.8 is not supported. sdk_install.install( config.PACKAGE_NAME, foldered_name, config.DEFAULT_TASK_COUNT, additional_options={"service": { "name": foldered_name }}, timeout_seconds=30 * 60) else: sdk_upgrade.test_upgrade( config.PACKAGE_NAME, foldered_name, config.DEFAULT_TASK_COUNT, additional_options={"service": { "name": foldered_name }}, timeout_seconds=30 * 60) yield # let the test session execute finally: sdk_install.uninstall(config.PACKAGE_NAME, foldered_name)
def _upgrade_or_downgrade(from_package_name, to_package_name, to_package_version, service_name, running_task_count, additional_options, timeout_seconds): task_ids = tasks.get_task_ids(service_name, '') if shakedown.dcos_version_less_than("1.10") or shakedown.ee_version( ) is None or from_package_name != to_package_name: log.info('Using marathon upgrade flow to upgrade {} => {} {}'.format( from_package_name, to_package_name, to_package_version)) marathon.destroy_app(service_name) install.install(to_package_name, running_task_count, service_name=service_name, additional_options=additional_options, timeout_seconds=timeout_seconds, package_version=to_package_version) else: log.info('Using CLI upgrade flow to upgrade {} => {} {}'.format( from_package_name, to_package_name, to_package_version)) if additional_options: with tempfile.NamedTemporaryFile() as opts_f: opts_f.write(json.dumps(additional_options).encode('utf-8')) opts_f.flush() # ensure json content is available for the CLI cmd.run_cli( '{} --name={} update start --package-version={} --options={}' .format(to_package_name, service_name, to_package_version, opts_f.name)) else: cmd.run_cli( '{} --name={} update start --package-version={}'.format( to_package_name, service_name, to_package_version)) log.info('Checking that all tasks have restarted') tasks.check_tasks_updated(service_name, '', task_ids)
def configure_package(configure_security): try: foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME) sdk_install.uninstall(config.PACKAGE_NAME, foldered_name) if shakedown.dcos_version_less_than("1.9"): # Last beta-kafka release (1.1.25-0.10.1.0-beta) excludes 1.8. Skip upgrade tests with 1.8 and just install sdk_install.install( config.PACKAGE_NAME, foldered_name, config.DEFAULT_BROKER_COUNT, additional_options={"service": { "name": foldered_name }}) else: sdk_upgrade.test_upgrade( config.PACKAGE_NAME, foldered_name, config.DEFAULT_BROKER_COUNT, additional_options={"service": { "name": foldered_name }}) # wait for brokers to finish registering before starting tests test_utils.broker_count_check(config.DEFAULT_BROKER_COUNT, service_name=foldered_name) yield # let the test session execute finally: sdk_install.uninstall(config.PACKAGE_NAME, foldered_name)
def configure_package(configure_security): try: install.uninstall(FOLDERED_SERVICE_NAME, package_name=config.PACKAGE_NAME) if shakedown.dcos_version_less_than("1.9"): # Last beta-kafka release (1.1.25-0.10.1.0-beta) excludes 1.8. Skip upgrade tests with 1.8 and just install install.install(config.PACKAGE_NAME, config.DEFAULT_BROKER_COUNT, service_name=FOLDERED_SERVICE_NAME, additional_options={ "service": { "name": FOLDERED_SERVICE_NAME } }) else: sdk_upgrade.test_upgrade("beta-{}".format(config.PACKAGE_NAME), config.PACKAGE_NAME, config.DEFAULT_BROKER_COUNT, service_name=FOLDERED_SERVICE_NAME, additional_options={ "service": { "name": FOLDERED_SERVICE_NAME } }) yield # let the test session execute finally: install.uninstall(FOLDERED_SERVICE_NAME, package_name=config.PACKAGE_NAME)
def install( package_name, expected_running_tasks, service_name=None, additional_options={}, package_version=None, timeout_seconds=TIMEOUT_SECONDS, wait_for_deployment=True): if not service_name: service_name = package_name start = time.time() merged_options = get_package_options(additional_options) log.info('Installing {}/{} with options={} version={}'.format( package_name, service_name, merged_options, package_version)) # 1. Install package, wait for tasks, wait for marathon deployment retried_shakedown_install( package_name, package_version, service_name, merged_options, timeout_seconds, expected_running_tasks) # 2. Wait for the scheduler to be idle (as implied by deploy plan completion and suppressed bit) # This should be skipped ONLY when it's known that the scheduler will be stuck in an incomplete state. if wait_for_deployment: # this can take a while, default is 15 minutes. for example with HDFS, we can hit the expected # total task count via FINISHED tasks, without actually completing deployment log.info("Waiting for {}/{} to finish deployment plan...".format( package_name, service_name)) sdk_plan.wait_for_completed_deployment(service_name, timeout_seconds) # given the above wait for plan completion, here we just wait up to 5 minutes if shakedown.dcos_version_less_than("1.9"): log.info("Skipping `is_suppressed` check for %s/%s as this is only suppored starting in version 1.9", package_name, service_name) else: log.info("Waiting for %s/%s to be suppressed...", package_name, service_name) shakedown.wait_for( lambda: sdk_api.is_suppressed(service_name), noisy=True, timeout_seconds=5 * 60) log.info('Installed {}/{} after {}'.format( package_name, service_name, shakedown.pretty_duration(time.time() - start)))
def install( package_name, service_name, expected_running_tasks, additional_options={}, package_version=None, timeout_seconds=TIMEOUT_SECONDS, wait_for_deployment=True): start = time.time() merged_options = get_package_options(additional_options) log.info('Installing {}/{} with options={} version={}'.format( package_name, service_name, merged_options, package_version)) # 1. Install package, wait for tasks, wait for marathon deployment retried_shakedown_install( package_name, service_name, package_version, merged_options, timeout_seconds, expected_running_tasks) # 2. Wait for the scheduler to be idle (as implied by deploy plan completion and suppressed bit) # This should be skipped ONLY when it's known that the scheduler will be stuck in an incomplete state. if wait_for_deployment: # this can take a while, default is 15 minutes. for example with HDFS, we can hit the expected # total task count via FINISHED tasks, without actually completing deployment log.info("Waiting for {}/{} to finish deployment plan...".format( package_name, service_name)) sdk_plan.wait_for_completed_deployment(service_name, timeout_seconds) # given the above wait for plan completion, here we just wait up to 5 minutes if shakedown.dcos_version_less_than("1.9"): log.info("Skipping `is_suppressed` check for %s/%s as this is only suppored starting in version 1.9", package_name, service_name) else: log.info("Waiting for %s/%s to be suppressed...", package_name, service_name) shakedown.wait_for( lambda: sdk_api.is_suppressed(service_name), noisy=True, timeout_seconds=5 * 60) log.info('Installed {}/{} after {}'.format( package_name, service_name, shakedown.pretty_duration(time.time() - start)))
def configure_package(configure_security): try: sdk_install.uninstall(config.PACKAGE_NAME, FOLDERED_SERVICE_NAME) if shakedown.dcos_version_less_than("1.9"): # HDFS upgrade in 1.8 is not supported. sdk_install.install( config.PACKAGE_NAME, FOLDERED_SERVICE_NAME, config.DEFAULT_TASK_COUNT, additional_options={"service": {"name": FOLDERED_SERVICE_NAME}}, timeout_seconds=30 * 60) else: sdk_upgrade.test_upgrade( config.PACKAGE_NAME, FOLDERED_SERVICE_NAME, config.DEFAULT_TASK_COUNT, additional_options={"service": {"name": FOLDERED_SERVICE_NAME}}, timeout_seconds=30 * 60) yield # let the test session execute finally: sdk_install.uninstall(config.PACKAGE_NAME, FOLDERED_SERVICE_NAME)
def configure_package(configure_security): try: install.uninstall(config.PACKAGE_NAME, FOLDERED_SERVICE_NAME) if shakedown.dcos_version_less_than("1.9"): # Last beta-kafka release (1.1.25-0.10.1.0-beta) excludes 1.8. Skip upgrade tests with 1.8 and just install install.install( config.PACKAGE_NAME, FOLDERED_SERVICE_NAME, config.DEFAULT_BROKER_COUNT, additional_options={"service": {"name": FOLDERED_SERVICE_NAME}}) else: sdk_upgrade.test_upgrade( config.PACKAGE_NAME, FOLDERED_SERVICE_NAME, config.DEFAULT_BROKER_COUNT, additional_options={"service": {"name": FOLDERED_SERVICE_NAME}}) # wait for brokers to finish registering before starting tests test_utils.broker_count_check(config.DEFAULT_BROKER_COUNT, service_name=FOLDERED_SERVICE_NAME) yield # let the test session execute finally: install.uninstall(config.PACKAGE_NAME, FOLDERED_SERVICE_NAME)
def dcos_version_less_than(version): return shakedown.dcos_version_less_than(version)
def uninstall(service_name, package_name=None, role=None, principal=None, zk=None): start = time.time() if package_name is None: package_name = service_name if shakedown.dcos_version_less_than("1.10"): sdk_utils.out('Uninstalling/janitoring {}'.format(service_name)) try: shakedown.uninstall_package_and_wait(package_name, service_name=service_name) except (dcos.errors.DCOSException, ValueError) as e: sdk_utils.out('Got exception when uninstalling package, ' + 'continuing with janitor anyway: {}'.format(e)) janitor_start = time.time() # leading slash removed, other slashes converted to double underscores: deslashed_service_name = service_name.lstrip('/').replace('/', '__') if role is None: role = deslashed_service_name + '-role' if principal is None: principal = service_name + '-principal' if zk is None: zk = 'dcos-service-' + deslashed_service_name janitor_cmd = ('docker run mesosphere/janitor /janitor.py ' '-r {role} -p {principal} -z {zk} --auth_token={auth}') shakedown.run_command_on_master( janitor_cmd.format( role=role, principal=principal, zk=zk, auth=shakedown.run_dcos_command( 'config show core.dcos_acs_token')[0].strip())) finish = time.time() sdk_utils.out( 'Uninstall done after pkg({}) + janitor({}) = total({})'.format( shakedown.pretty_duration(janitor_start - start), shakedown.pretty_duration(finish - janitor_start), shakedown.pretty_duration(finish - start))) else: sdk_utils.out('Uninstalling {}'.format(service_name)) try: shakedown.uninstall_package_and_wait(package_name, service_name=service_name) # service_name may already contain a leading slash: marathon_app_id = '/' + service_name.lstrip('/') sdk_utils.out( 'Waiting for no deployments for {}'.format(marathon_app_id)) shakedown.deployment_wait(600, marathon_app_id) # wait for service to be gone according to marathon def marathon_dropped_service(): client = shakedown.marathon.create_client() app_list = client.get_apps() app_ids = [app['id'] for app in app_list] sdk_utils.out('Marathon apps: {}'.format(app_ids)) matching_app_ids = [ app_id for app_id in app_ids if app_id == marathon_app_id ] if len(matching_app_ids) > 1: sdk_utils.out('Found multiple apps with id {}'.format( marathon_app_id)) return len(matching_app_ids) == 0 sdk_utils.out( 'Waiting for no {} Marathon app'.format(marathon_app_id)) shakedown.time_wait(marathon_dropped_service) except (dcos.errors.DCOSException, ValueError) as e: sdk_utils.out( 'Got exception when uninstalling package: {}'.format(e)) finally: sdk_utils.list_reserved_resources()
def _uninstall( package_name, service_name, role=None, service_account=None, zk=None): start = time.time() if shakedown.dcos_version_less_than("1.10"): log.info('Uninstalling/janitoring {}'.format(service_name)) try: shakedown.uninstall_package_and_wait( package_name, service_name=service_name) except (dcos.errors.DCOSException, ValueError) as e: log.info('Got exception when uninstalling package, ' + 'continuing with janitor anyway: {}'.format(e)) if 'marathon' in str(e): log.info('Detected a probable marathon flake. Raising so retry will trigger.') raise janitor_start = time.time() # leading slash removed, other slashes converted to double underscores: deslashed_service_name = service_name.lstrip('/').replace('/', '__') if role is None: role = deslashed_service_name + '-role' if service_account is None: service_account = service_name + '-principal' if zk is None: zk = 'dcos-service-' + deslashed_service_name janitor_cmd = ('docker run mesosphere/janitor /janitor.py ' '-r {role} -p {service_account} -z {zk} --auth_token={auth}') shakedown.run_command_on_master( janitor_cmd.format( role=role, service_account=service_account, zk=zk, auth=sdk_cmd.run_cli('config show core.dcos_acs_token', print_output=False).strip())) finish = time.time() log.info( 'Uninstall done after pkg({}) + janitor({}) = total({})'.format( shakedown.pretty_duration(janitor_start - start), shakedown.pretty_duration(finish - janitor_start), shakedown.pretty_duration(finish - start))) else: log.info('Uninstalling {}'.format(service_name)) try: shakedown.uninstall_package_and_wait( package_name, service_name=service_name) # service_name may already contain a leading slash: marathon_app_id = '/' + service_name.lstrip('/') log.info('Waiting for no deployments for {}'.format(marathon_app_id)) shakedown.deployment_wait(TIMEOUT_SECONDS, marathon_app_id) # wait for service to be gone according to marathon def marathon_dropped_service(): client = shakedown.marathon.create_client() app_list = client.get_apps() app_ids = [app['id'] for app in app_list] log.info('Marathon apps: {}'.format(app_ids)) matching_app_ids = [ app_id for app_id in app_ids if app_id == marathon_app_id ] if len(matching_app_ids) > 1: log.info('Found multiple apps with id {}'.format( marathon_app_id)) return len(matching_app_ids) == 0 log.info('Waiting for no {} Marathon app'.format(marathon_app_id)) shakedown.time_wait(marathon_dropped_service, timeout_seconds=TIMEOUT_SECONDS) except (dcos.errors.DCOSException, ValueError) as e: log.info( 'Got exception when uninstalling package: {}'.format(e)) if 'marathon' in str(e): log.info('Detected a probable marathon flake. Raising so retry will trigger.') raise finally: sdk_utils.list_reserved_resources()
def get_foldered_name(service_name): # DCOS 1.9 & earlier don't support "foldered", service names aka marathon # group names if shakedown.dcos_version_less_than("1.10"): return "test_integration_" + service_name return "/test/integration/" + service_name