Beispiel #1
0
def soak_upgrade_downgrade(
        package_name,
        service_name,
        running_task_count,
        additional_options={},
        timeout_seconds=25*60,
        wait_for_deployment=True):
    sdk_cmd.run_cli("package install --cli {} --yes".format(package_name))
    version = 'stub-universe'
    log.info('Upgrading to test version: {} {}'.format(package_name, version))
    _upgrade_or_downgrade(
        package_name,
        version,
        service_name,
        running_task_count,
        additional_options,
        timeout_seconds,
        wait_for_deployment)

    # Default Universe is at --index=0
    version = _get_pkg_version(package_name)
    log.info('Downgrading to Universe version: {} {}'.format(package_name, version))
    _upgrade_or_downgrade(
        package_name,
        version,
        service_name,
        running_task_count,
        additional_options,
        timeout_seconds,
        wait_for_deployment)
Beispiel #2
0
def add_universe_repos():
    traceback.print_stack()
    log.info('Adding universe repos')

    # prepare needed universe repositories
    stub_universe_urls = os.environ.get('STUB_UNIVERSE_URL', '').split(',')

    stub_urls = {}

    if not stub_universe_urls:
        return stub_urls

    log.info('Adding stub URLs: {}'.format(stub_universe_urls))
    for url in stub_universe_urls:
        package_name = 'testpkg-'
        package_name += ''.join(
            random.choice(string.ascii_lowercase + string.digits)
            for _ in range(8))
        stub_urls[package_name] = url

    # clean up any duplicate repositories
    current_universes = sdk_cmd.run_cli('package repo list --json')
    for repo in json.loads(current_universes)['repositories']:
        if repo['uri'] in stub_urls.values():
            log.info('Removing duplicate stub URL: {}'.format(repo['uri']))
            sdk_cmd.run_cli('package repo remove {}'.format(repo['name']))

    # add the needed universe repositories
    for name, url in stub_urls.items():
        log.info('Adding stub URL: {}'.format(url))
        sdk_cmd.run_cli('package repo add --index=0 {} {}'.format(name, url))

    log.info('Finished adding universe repos')

    return stub_urls
Beispiel #3
0
def get_diagnostics_bundle(item: pytest.Item):
    rc, _, _ = sdk_cmd.run_raw_cli('node diagnostics create all')
    if rc:
        log.error('Diagnostics bundle creation failed.')
        return

    @retrying.retry(wait_fixed=5000,
                    stop_max_delay=10 * 60 * 1000,
                    retry_on_result=lambda result: result is None)
    def wait_for_bundle_file():
        rc, stdout, stderr = sdk_cmd.run_raw_cli(
            'node diagnostics --status --json')
        if rc:
            return None

        # e.g. { "some-ip": { stuff we want } }
        status = next(iter(json.loads(stdout).values()))
        if status['job_progress_percentage'] != 100:
            return None

        # e.g. "/var/lib/dcos/dcos-diagnostics/diag-bundles/bundle-2018-01-11-1515698691.zip"
        return os.path.basename(status['last_bundle_dir'])

    bundle_filename = wait_for_bundle_file()
    if bundle_filename:
        sdk_cmd.run_cli('node diagnostics download {} --location={}'.format(
            bundle_filename, setup_artifact_path(item, bundle_filename)))
    else:
        log.error('Diagnostics bundle didnt finish in time, giving up.')
Beispiel #4
0
def _upgrade_or_downgrade(
        package_name,
        to_package_version,
        service_name,
        running_task_count,
        additional_options,
        timeout_seconds,
        wait_for_deployment):

    initial_config = get_config(package_name, service_name)
    task_ids = sdk_tasks.get_task_ids(service_name, '')

    if sdk_utils.dcos_version_less_than("1.10") or shakedown.ee_version() is None:
        log.info('Using marathon upgrade flow to upgrade {} {}'.format(package_name, to_package_version))
        sdk_marathon.destroy_app(service_name)
        sdk_install.install(
            package_name,
            service_name,
            running_task_count,
            additional_options=additional_options,
            package_version=to_package_version,
            timeout_seconds=timeout_seconds,
            wait_for_deployment=wait_for_deployment)
    else:
        log.info('Using CLI upgrade flow to upgrade {} {}'.format(package_name, to_package_version))
        if additional_options:
            with tempfile.NamedTemporaryFile() as opts_f:
                opts_f.write(json.dumps(additional_options).encode('utf-8'))
                opts_f.flush()  # ensure json content is available for the CLI to read below
                sdk_cmd.svc_cli(
                    package_name, service_name,
                    'update start --package-version={} --options={}'.format(to_package_version, opts_f.name))
        else:
            sdk_cmd.svc_cli(
                package_name, service_name,
                'update start --package-version={}'.format(to_package_version))
        # we must manually upgrade the package CLI because it's not done automatically in this flow
        # (and why should it? that'd imply the package CLI replacing itself via a call to the main CLI...)
        sdk_cmd.run_cli(
            'package install --yes --cli --package-version={} {}'.format(to_package_version, package_name))

    if wait_for_deployment:

        updated_config = get_config(package_name, service_name)

        if updated_config == initial_config:
            log.info('No config change detected. Tasks should not be restarted')
            sdk_tasks.check_tasks_not_updated(service_name, '', task_ids)
        else:
            log.info('Checking that all tasks have restarted')
            sdk_tasks.check_tasks_updated(service_name, '', task_ids)

        # this can take a while, default is 15 minutes. for example with HDFS, we can hit the expected
        # total task count via ONCE tasks, without actually completing deployment
        log.info("Waiting for package={} service={} to finish deployment plan...".format(
            package_name, service_name))
        sdk_plan.wait_for_completed_deployment(service_name, timeout_seconds)
Beispiel #5
0
def create_service_account(service_account_name: str,
                           service_account_secret: str) -> None:
    log.info('Creating service account for account={account} secret={secret}'.
             format(account=service_account_name,
                    secret=service_account_secret))

    log.info('Install cli necessary for security')
    sdk_cmd.run_cli('package install dcos-enterprise-cli --yes')

    log.info('Remove any existing service account and/or secret')
    delete_service_account(service_account_name, service_account_secret)

    log.info('Create keypair')
    sdk_cmd.run_cli(
        'security org service-accounts keypair private-key.pem public-key.pem')

    log.info('Create service account')
    sdk_cmd.run_cli(
        'security org service-accounts create -p public-key.pem -d "Service account for integration tests" "{account}"'
        .format(account=service_account_name))

    log.info('Create secret')
    sdk_cmd.run_cli(
        'security secrets create-sa-secret --strict private-key.pem "{account}" "{secret}"'
        .format(account=service_account_name, secret=service_account_secret))

    log.info(
        'Service account created for account={account} secret={secret}'.format(
            account=service_account_name, secret=service_account_secret))
Beispiel #6
0
def _get_universe_url():
    repositories = json.loads(sdk_cmd.run_cli('package repo list --json'))['repositories']
    for repo in repositories:
        if repo['name'] == 'Universe':
            log.info("Found Universe URL: {}".format(repo['uri']))
            return repo['uri']
    assert False, "Unable to find 'Universe' in list of repos: {}".format(repositories)
Beispiel #7
0
def delete_service_account(service_account_name: str,
                           service_account_secret: str) -> None:
    """
    Deletes service account with private key that belongs to the service account.
    """
    # ignore any failures:
    sdk_cmd.run_cli("security org service-accounts delete {name}".format(
        name=service_account_name))

    # Files generated by service-accounts keypair command should get removed
    for keypair_file in ['private-key.pem', 'public-key.pem']:
        try:
            os.unlink(keypair_file)
        except OSError:
            pass

    delete_secret(secret=service_account_secret)
Beispiel #8
0
def get_task_ids():
    """ This function uses dcos task WITHOUT the JSON options because
    that can return the wrong user for schedulers
    """
    tasks = sdk_cmd.run_cli('task --all', print_output=False).split('\n')
    for task_str in tasks[1:]:  # First line is the header line
        task = task_str.split()
        if len(task) < 5:
            continue
        yield task[4]
Beispiel #9
0
def _get_kdc_task(task_name: str) -> dict:
    """
    :return (dict): The task object of the KDC app with desired properties to be retrieved by other methods.
    """
    log.info("Getting KDC task")
    raw_tasks = sdk_cmd.run_cli("task --json")
    if raw_tasks:
        tasks = json.loads(raw_tasks)
        for task in tasks:
            if task["name"] == task_name:
                return task

    raise RuntimeError("Expecting marathon KDC task but no such task found. Running tasks: {tasks}".format(
        tasks=raw_tasks))
Beispiel #10
0
def _get_host_name(host_id: str) -> str:
    """
    Fetches the host name for the host running the KDC app.
    :param host_id (str): The ID of the host, used to look up the appropriate node.
    :return (str): Name of the host running the KDC app.
    """
    log.info("Getting hostname")
    raw_nodes = sdk_cmd.run_cli("node --json")
    if raw_nodes:
        nodes = json.loads(raw_nodes)
        for node in nodes:
            if "id" in node and node["id"] == host_id:
                log.info("Host name is %s", node["hostname"])
                return node["hostname"]

    raise RuntimeError("Failed to get name of host running the KDC app: {nodes}")
Beispiel #11
0
def get_task_files_for_id(task_id: str) -> dict:
    try:
        ls_lines = sdk_cmd.run_cli(
            'task ls --long --all {}'.format(task_id)).split('\n')
        ret = {}
        for line in ls_lines:
            match = task_ls_pattern.match(line)
            if not match:
                log.warning('Unable to parse line: {}'.format(line))
                continue
            # match.group(1): "4096  ", match.group(2): "Jul 21 22:07", match.group(3): "jre1.8.0_144  "
            filename = match.group(3).strip()
            # build timestamp for use in output filename: 'Jul 21 22:07' => '0721_2207'
            timestamp = time.strftime(
                '%m%d_%H%M', time.strptime(match.group(2), '%b %d %H:%M'))
            ret[filename] = timestamp
        return ret
    except:
        log.exception(
            'Failed to get list of files for task: {}'.format(task_id))
        return {}
Beispiel #12
0
def delete_secret(secret: str) -> None:
    """
    Deletes a given secret.
    """
    # ignore any failures:
    sdk_cmd.run_cli("security secrets delete {}".format(secret))
Beispiel #13
0
def _uninstall(package_name,
               service_name,
               role=None,
               service_account=None,
               zk=None):
    start = time.time()

    if sdk_utils.dcos_version_less_than('1.10'):
        log.info('Uninstalling/janitoring {}'.format(service_name))
        try:
            shakedown.uninstall_package_and_wait(package_name,
                                                 service_name=service_name)
        except (dcos.errors.DCOSException, ValueError) as e:
            log.info('Got exception when uninstalling package, ' +
                     'continuing with janitor anyway: {}'.format(e))
            if 'marathon' in str(e):
                log.info(
                    'Detected a probable marathon flake. Raising so retry will trigger.'
                )
                raise

        janitor_start = time.time()

        # leading slash removed, other slashes converted to double underscores:
        deslashed_service_name = service_name.lstrip('/').replace('/', '__')
        if role is None:
            role = deslashed_service_name + '-role'
        if service_account is None:
            service_account = service_name + '-principal'
        if zk is None:
            zk = 'dcos-service-' + deslashed_service_name
        janitor_cmd = (
            'docker run mesosphere/janitor /janitor.py '
            '-r {role} -p {service_account} -z {zk} --auth_token={auth}')
        shakedown.run_command_on_master(
            janitor_cmd.format(role=role,
                               service_account=service_account,
                               zk=zk,
                               auth=sdk_cmd.run_cli(
                                   'config show core.dcos_acs_token',
                                   print_output=False).strip()))

        finish = time.time()

        log.info(
            'Uninstall done after pkg({}) + janitor({}) = total({})'.format(
                shakedown.pretty_duration(janitor_start - start),
                shakedown.pretty_duration(finish - janitor_start),
                shakedown.pretty_duration(finish - start)))
    else:
        log.info('Uninstalling {}'.format(service_name))
        try:
            shakedown.uninstall_package_and_wait(package_name,
                                                 service_name=service_name)
            # service_name may already contain a leading slash:
            marathon_app_id = '/' + service_name.lstrip('/')
            log.info(
                'Waiting for no deployments for {}'.format(marathon_app_id))
            shakedown.deployment_wait(TIMEOUT_SECONDS, marathon_app_id)

            # wait for service to be gone according to marathon
            client = shakedown.marathon.create_client()

            def marathon_dropped_service():
                app_ids = [app['id'] for app in client.get_apps()]
                log.info('Marathon apps: {}'.format(app_ids))
                matching_app_ids = [
                    app_id for app_id in app_ids if app_id == marathon_app_id
                ]
                if len(matching_app_ids) > 1:
                    log.warning('Found multiple apps with id {}'.format(
                        marathon_app_id))
                return len(matching_app_ids) == 0

            log.info('Waiting for no {} Marathon app'.format(marathon_app_id))
            shakedown.time_wait(marathon_dropped_service,
                                timeout_seconds=TIMEOUT_SECONDS)

        except (dcos.errors.DCOSException, ValueError) as e:
            log.info('Got exception when uninstalling package: {}'.format(e))
            if 'marathon' in str(e):
                log.info(
                    'Detected a probable marathon flake. Raising so retry will trigger.'
                )
                raise
        finally:
            sdk_utils.list_reserved_resources()