Esempio n. 1
0
def start_plan(service_name, plan, parameters=None):
    sdk_cmd.service_request(
        "POST",
        service_name,
        "/v1/plans/{}/start".format(plan),
        json=parameters if parameters is not None else {},
    )
def install_job(job_dict):
    job_name = job_dict['id']

    # attempt to delete current job, if any:
    _remove_job_by_name(job_name)

    log.info('Adding job {}:\n{}'.format(job_name, json.dumps(job_dict)))
    sdk_cmd.service_request('POST', 'metronome', '/v1/jobs', json=job_dict)
Esempio n. 3
0
def force_complete_step(service_name: str, plan: str, phase: str,
                        step: str) -> None:
    sdk_cmd.service_request(
        "POST",
        service_name,
        "/v1/plans/{}/forceComplete?phase={}&step={}".format(
            plan, phase, step),
    )
Esempio n. 4
0
def copy_job(service_name, src_name, dst_name, timeout_seconds=SHORT_TIMEOUT_SECONDS):
    path = 'createItem?name={}&mode=copy&from={}'.format(dst_name, src_name)
    sdk_cmd.service_request('POST', service_name, path, timeout_seconds=timeout_seconds)

    # Copy starts jobs off disable and you have to disable them and enable again to get them "buildable"
    # https://github.com/entagen/jenkins-build-per-branch/issues/41
    disable_job(service_name, dst_name)
    enable_job(service_name, dst_name)
Esempio n. 5
0
def install_job(job_dict: Dict[str, Any]) -> None:
    job_name = job_dict["id"]

    # attempt to delete current job, if any:
    _remove_job_by_name(job_name)

    log.info("Adding job {}:\n{}".format(job_name, json.dumps(job_dict)))
    sdk_cmd.service_request("POST", "metronome", "/v1/jobs", json=job_dict)
Esempio n. 6
0
def start_plan(service_name: str,
               plan: str,
               parameters: Optional[Dict[str, Any]] = None) -> None:
    sdk_cmd.service_request(
        "POST",
        service_name,
        "/v1/plans/{}/start".format(plan),
        json=parameters if parameters is not None else {},
    )
def test_add_deploy_restart_remove():
    svc1 = 'test1'

    # add svc as test1:
    sdk_cmd.service_request('POST',
                            config.SERVICE_NAME,
                            '/v1/multi/{}?yaml=svc'.format(svc1),
                            json=service_params(svc1))
    # get list, should immediately have new entry:
    service = get_service_list()[0]
    assert service['service'] == svc1
    assert service['yaml'] == 'svc'
    assert not service['uninstall']

    sdk_plan.wait_for_plan_status(config.SERVICE_NAME,
                                  'deploy',
                                  'COMPLETE',
                                  multiservice_name=svc1)

    task_ids = sdk_tasks.get_task_ids('marathon', config.SERVICE_NAME)
    log.info('list of task ids {}'.format(task_ids))
    old_task_id = task_ids[0]

    # restart and check that service is recovered:
    sdk_marathon.restart_app(config.SERVICE_NAME)
    sdk_marathon.wait_for_app_running(config.SERVICE_NAME,
                                      sdk_marathon.TIMEOUT_SECONDS)

    # check that scheduler task was relaunched
    check_scheduler_relaunched(config.SERVICE_NAME, old_task_id)

    service = wait_for_service_count(1)[0]
    assert service['service'] == svc1
    assert service['yaml'] == 'svc'
    assert not service['uninstall']

    plan = sdk_plan.wait_for_plan_status(config.SERVICE_NAME,
                                         'deploy',
                                         'COMPLETE',
                                         multiservice_name=svc1)
    # verify that svc.yml was deployed as svc1:
    assert sdk_plan.get_all_step_names(plan) == [
        'hello-0:[server]', 'world-0:[server]', 'world-1:[server]'
    ]

    # trigger service removal, wait for removal:
    sdk_cmd.service_request('DELETE', config.SERVICE_NAME,
                            '/v1/multi/{}'.format(svc1))
    # check delete bit is set. however, be permissive of service being removed VERY quickly:
    services = get_service_list()
    assert len(services) <= 1
    for service in services:
        assert service['service'] == svc1
        assert service['yaml'] == 'svc'
        assert service['uninstall']
    wait_for_service_count(0)
def test_add_deploy_restart_remove():
    svc1 = "test1"

    # add svc as test1:
    sdk_cmd.service_request("POST",
                            config.SERVICE_NAME,
                            "/v1/multi/{}?yaml=svc".format(svc1),
                            json=service_params(svc1))
    # get list, should immediately have new entry:
    service = get_service_list()[0]
    assert service["service"] == svc1
    assert service["yaml"] == "svc"
    assert not service["uninstall"]

    sdk_plan.wait_for_plan_status(config.SERVICE_NAME,
                                  "deploy",
                                  "COMPLETE",
                                  multiservice_name=svc1)

    task_ids = sdk_tasks.get_task_ids("marathon", config.SERVICE_NAME)
    log.info("list of task ids {}".format(task_ids))
    old_task_id = task_ids[0]

    # restart and check that service is recovered:
    sdk_marathon.restart_app(config.SERVICE_NAME)

    # check that scheduler task was relaunched
    sdk_tasks.check_scheduler_relaunched(config.SERVICE_NAME, old_task_id)

    service = wait_for_service_count(1)[0]
    assert service["service"] == svc1
    assert service["yaml"] == "svc"
    assert not service["uninstall"]

    plan = sdk_plan.wait_for_plan_status(config.SERVICE_NAME,
                                         "deploy",
                                         "COMPLETE",
                                         multiservice_name=svc1)
    # verify that svc.yml was deployed as svc1:
    assert sdk_plan.get_all_step_names(plan) == [
        "hello-0:[server]",
        "world-0:[server]",
        "world-1:[server]",
    ]

    # trigger service removal, wait for removal:
    sdk_cmd.service_request("DELETE", config.SERVICE_NAME,
                            "/v1/multi/{}".format(svc1))
    # check delete bit is set. however, be permissive of service being removed VERY quickly:
    services = get_service_list()
    assert len(services) <= 1
    for service in services:
        assert service["service"] == svc1
        assert service["yaml"] == "svc"
        assert service["uninstall"]
    wait_for_service_count(0)
def _remove_job_by_name(job_name):
    try:
        # Metronome doesn't understand 'True' -- only 'true' will do.
        sdk_cmd.service_request(
            'DELETE', 'metronome', '/v1/jobs/{}'.format(job_name),
            retry=False,
            params={'stopCurrentJobRuns': 'true'})
    except:
        log.info('Failed to remove any existing job named {} (this is likely as expected):\n{}'.format(
            job_name, traceback.format_exc()))
Esempio n. 10
0
def require_spark(service_name=SPARK_SERVICE_NAME, additional_options={}, zk='spark_mesos_dispatcher'):
    teardown_spark(service_name, zk)

    sdk_install.install(
        SPARK_PACKAGE_NAME,
        service_name,
        0,
        additional_options=get_spark_options(service_name, additional_options),
        wait_for_deployment=False, # no deploy plan
        insert_strict_options=False) # lacks principal + secret_name options

    # wait for dispatcher to be reachable over HTTP
    sdk_cmd.service_request('GET', service_name, '', timeout_seconds=300)
Esempio n. 11
0
def _remove_job_by_name(job_name):
    try:
        # Metronome doesn't understand 'True' -- only 'true' will do.
        sdk_cmd.service_request(
            "DELETE",
            "metronome",
            "/v1/jobs/{}".format(job_name),
            retry=False,
            params={"stopCurrentJobRuns": "true"},
        )
    except Exception as e:
        log.info(
            "Failed to remove any existing job named {} (this is likely as expected):\n{}"
            .format(job_name, e))
Esempio n. 12
0
def _dump_threads(item: pytest.Item, service_name: str):
    threads = sdk_cmd.service_request('GET', service_name, 'v1/debug/threads')
    out_path = _setup_artifact_path(item, 'threads_{}.out'.format(service_name.replace('/', '_')))
    log.info('=> Writing {} ({} bytes)'.format(out_path, len(threads)))
    with open(out_path, 'w') as f:
        f.write(threads)
        f.write('\n')  # ... and a trailing newline
Esempio n. 13
0
def test_cni_labels():
    def check_labels(labels, idx):
        k = labels[idx]["key"]
        v = labels[idx]["value"]
        assert k in EXPECTED_NETWORK_LABELS.keys(
        ), "Got unexpected network key {}".format(k)
        assert v == EXPECTED_NETWORK_LABELS[k], "Value {obs} isn't correct, should be " \
                                                "{exp}".format(obs=v, exp=EXPECTED_NETWORK_LABELS[k])

    r = sdk_cmd.service_request('GET', config.SERVICE_NAME,
                                "/v1/pod/hello-overlay-vip-0/info").json()
    assert len(
        r) == 1, "Got multiple responses from v1/pod/hello-overlay-vip-0/info"
    try:
        cni_labels = r[0]["info"]["executor"]["container"]["networkInfos"][0][
            "labels"]["labels"]
    except KeyError:
        assert False, "CNI labels not present"
    assert len(cni_labels) == 2, "Got {} labels, should be 2".format(
        len(cni_labels))
    for i in range(2):
        try:
            check_labels(cni_labels, i)
        except KeyError:
            assert False, "Couldn't get CNI labels from {}".format(cni_labels)
def test_all_tasks_are_launched():
    service_options = {"service": {"yaml": "plan"}}
    sdk_install.install(config.PACKAGE_NAME,
                        foldered_name,
                        0,
                        additional_options=service_options,
                        wait_for_deployment=False,
                        wait_for_all_conditions=True)
    # after above method returns, start all plans right away.
    plans = ["manual-plan-0", "manual-plan-1", "manual-plan-2"]
    for plan in plans:
        sdk_plan.start_plan(foldered_name, plan)
    for plan in plans:
        sdk_plan.wait_for_completed_plan(foldered_name, plan)
    pods = ["custom-pod-A-0", "custom-pod-B-0", "custom-pod-C-0"]
    for pod in pods:
        # /pod/<pod-id>/info fetches data from SDK's persistence layer
        pod_hello_0_info = sdk_cmd.service_request(
            "GET", foldered_name, "/v1/pod/{}/info".format(pod)).json()
        for taskInfoAndStatus in pod_hello_0_info:
            info = taskInfoAndStatus["info"]
            status = taskInfoAndStatus["status"]
            # While `info` object is always present, `status` may or may not be present based
            # on whether the task was launched and we received an update from mesos (or not).
            if status:
                assert info["taskId"]["value"] == status["taskId"]["value"]
                assert len(info["taskId"]["value"]) > 0
            else:
                assert len(info["taskId"]["value"]) == 0
Esempio n. 15
0
def fault_domain_vars_are_present(pod_instance):
    info = sdk_cmd.service_request('GET', config.SERVICE_NAME, '/v1/pod/{}/info'.format(pod_instance)).json()[0]['info']
    variables = info['command']['environment']['variables']
    region = next((var for var in variables if var['name'] == 'REGION'), ['NO_REGION'])
    zone = next((var for var in variables if var['name'] == 'ZONE'), ['NO_ZONE'])

    return region != 'NO_REGION' and zone != 'NO_ZONE' and len(region) > 0 and len(zone) > 0
Esempio n. 16
0
    def wait():
        # Note: We COULD directly query the run here via /v1/jobs/<job_name>/runs/<run_id>, but that
        # only works for active runs -- for whatever reason the run will disappear after it's done.
        # Therefore we have to query the full run history from the parent job and find our run_id there.
        run_history = sdk_cmd.service_request('GET',
                                              'metronome',
                                              '/v1/jobs/{}'.format(job_name),
                                              retry=False,
                                              params={
                                                  'embed': 'history'
                                              }).json()['history']

        successful_run_ids = [
            run['id'] for run in run_history['successfulFinishedRuns']
        ]
        failed_run_ids = [
            run['id'] for run in run_history['failedFinishedRuns']
        ]

        log.info(
            'Job {} run history (waiting for successful {}): successful={} failed={}'
            .format(job_name, run_id, successful_run_ids, failed_run_ids))

        # Note: If a job has restart.policy=ON_FAILURE, it won't show up in failed_run_ids even when it fails.
        #       Instead it will just keep restarting automatically until it succeeds or is deleted.
        if raise_on_failure and run_id in failed_run_ids:
            raise Exception(
                'Job {} with id {} has failed, exiting early'.format(
                    job_name, run_id))

        return run_id in successful_run_ids
Esempio n. 17
0
def test_cni_labels():
    def check_labels(labels, idx):
        k = labels[idx]["key"]
        v = labels[idx]["value"]

        expected_network_labels = {"key0": "val0", "key1": "val1"}
        assert k in expected_network_labels.keys(), "Got unexpected network key {}".format(k)
        assert v == expected_network_labels[k], (
            "Value {obs} isn't correct, should be "
            "{exp}".format(obs=v, exp=expected_network_labels[k])
        )

    r = sdk_cmd.service_request(
        "GET", config.SERVICE_NAME, "/v1/pod/overlay-vip-0/info"
    ).json()
    assert len(r) == 1, "Got multiple responses from v1/pod/overlay-vip-0/info"
    try:
        cni_labels = r[0]["info"]["executor"]["container"]["networkInfos"][0]["labels"]["labels"]
    except KeyError:
        assert False, "CNI labels not present"
    assert len(cni_labels) == 2, "Got {} labels, should be 2".format(len(cni_labels))
    for i in range(2):
        try:
            check_labels(cni_labels, i)
        except KeyError:
            assert False, "Couldn't get CNI labels from {}".format(cni_labels)
def configure_package(configure_security):
    try:
        sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME)
        options = {
            "service": {
                # empty yaml: start in dynamic multiservice mode
                "yaml": ""
            }
        }

        # do not poll scheduler-level deploy plan, there is none:
        sdk_install.install(
            config.PACKAGE_NAME,
            config.SERVICE_NAME,
            0,
            additional_options=options,
            wait_for_deployment=False,
        )

        # use yaml list as a proxy for checking that the scheduler is up:
        yamls = sdk_cmd.service_request("GET", config.SERVICE_NAME,
                                        "/v1/multi/yaml").json()
        assert "svc" in yamls

        yield  # let the test session execute
    finally:
        sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME)
Esempio n. 19
0
def test_task_dns_prefix_points_to_all_tasks():
    pod_info = sdk_cmd.service_request('GET', config.SERVICE_NAME,
                                       '/v1/pod/hello-0/info').json()
    # Assert that DiscoveryInfo is correctly set on tasks.
    assert (all(p["info"]["discovery"]["name"] == "hello-0" for p in pod_info))
    # Assert that the hello-0.hello-world.mesos DNS entry points to the right IP.
    sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME)
Esempio n. 20
0
    def wait() -> bool:
        # Note: We COULD directly query the run here via /v1/jobs/<job_name>/runs/<run_id>, but that
        # only works for active runs -- for whatever reason the run will disappear after it's done.
        # Therefore we have to query the full run history from the parent job and find our run_id there.
        run_history = sdk_cmd.service_request(
            "GET",
            "metronome",
            "/v1/jobs/{}".format(job_name),
            retry=False,
            params={"embed": "history"},
        ).json()["history"]

        successful_run_ids = [run["id"] for run in run_history["successfulFinishedRuns"]]
        failed_run_ids = [run["id"] for run in run_history["failedFinishedRuns"]]

        log.info(
            "Job {} run history (waiting for successful {}): successful={} failed={}".format(
                job_name, run_id, successful_run_ids, failed_run_ids
            )
        )

        # Note: If a job has restart.policy=ON_FAILURE, it won't show up in failed_run_ids even when it fails.
        #       Instead it will just keep restarting automatically until it succeeds or is deleted.
        if raise_on_failure and run_id in failed_run_ids:
            raise Exception("Job {} with id {} has failed, exiting early".format(job_name, run_id))

        return run_id in successful_run_ids
Esempio n. 21
0
def _get_jenkins_json(service_name,
                      path,
                      timeout_seconds=SHORT_TIMEOUT_SECONDS):
    return sdk_cmd.service_request('GET',
                                   service_name,
                                   path,
                                   timeout_seconds=timeout_seconds).json()
Esempio n. 22
0
def _remove_job_by_name(job_name: str) -> None:
    try:
        # Metronome doesn't understand 'True' -- only 'true' will do.
        sdk_cmd.service_request(
            "DELETE",
            "metronome",
            "/v1/jobs/{}".format(job_name),
            retry=False,
            params={"stopCurrentJobRuns": "true"},
        )
    except Exception as e:
        log.info(
            "Failed to remove any existing job named {} (this is likely as expected):\n{}".format(
                job_name, e
            )
        )
def make_post(
        post_body,
        service_name,
        **kwargs
):
    """
    :rtype: requests.Response
    """
    body = IMPORTS + post_body
    log.info('\nMaking request : ========\n{}\n========\n'.format(body))
    '''
    Note: To run locally:
    curl -i -H "Authorization:token=$(dcos config show core.dcos_acs_token)" \
         -k --data-urlencode "script=$(< <path-to-above-script-file>)" \
         https://<dcos-cluster>/service/jenkins/scriptText'
    '''
    import sdk_cmd
    return sdk_cmd.service_request(
        'POST',
        service_name,
        'scriptText',
        log_args=False,
        data={'script': body},
        **kwargs,
    )
Esempio n. 24
0
def fault_domain_vars_are_present(pod_instance):
    info = sdk_cmd.service_request('GET', config.SERVICE_NAME, '/v1/pod/{}/info'.format(pod_instance)).json()[0]['info']
    variables = info['command']['environment']['variables']
    region = next((var for var in variables if var['name'] == 'REGION'), ['NO_REGION'])
    zone = next((var for var in variables if var['name'] == 'ZONE'), ['NO_ZONE'])

    return region != 'NO_REGION' and zone != 'NO_ZONE' and len(region) > 0 and len(zone) > 0
Esempio n. 25
0
def list_plans(service_name, timeout_seconds=TIMEOUT_SECONDS, multiservice_name=None):
    if multiservice_name is None:
        path = "/v1/plans"
    else:
        path = "/v1/service/{}/plans".format(multiservice_name)
    return sdk_cmd.service_request(
        "GET", service_name, path, timeout_seconds=timeout_seconds
    ).json()
Esempio n. 26
0
def get_scheduler_metrics(service_name: str, timeout_seconds: int = 15 * 60) -> Dict[str, Any]:
    """Returns a dict tree of Scheduler metrics fetched directly from the scheduler.
    Returned data will match the content of /service/<svc_name>/v1/metrics.
    """
    response = sdk_cmd.service_request("GET", service_name, "/v1/metrics")
    response_json = response.json()
    assert isinstance(response_json, dict)
    return response_json
Esempio n. 27
0
def _dump_threads(item: pytest.Item, service_name: str) -> None:
    threads = sdk_cmd.service_request(
        "GET", service_name, "v1/debug/threads", timeout_seconds=5
    ).text
    out_path = _setup_artifact_path(item, "threads_{}.txt".format(service_name.replace("/", "_")))
    log.info("=> Writing {} ({} bytes)".format(out_path, len(threads)))
    with open(out_path, "w") as f:
        f.write(threads)
        f.write("\n")  # ... and a trailing newline
Esempio n. 28
0
def get_pod_region(service_name, pod_name):
    info = sdk_cmd.service_request(
        'GET', service_name,
        '/v1/pod/{}/info'.format(pod_name)).json()[0]['info']

    return [
        l['value'] for l in info['labels']['labels']
        if l['key'] == 'offer_region'
    ][0]
Esempio n. 29
0
 def wait_for_plan():
     response = sdk_cmd.service_request(
         'GET', service_name, '/v1/plans/{}'.format(plan),
         retry=False,
         raise_on_error=False)
     if response.status_code == 417:
         return response # avoid throwing, return plan with errors
     response.raise_for_status()
     return response
Esempio n. 30
0
    def fault_domain_vars_are_present(pod_instance):
        info = sdk_cmd.service_request(
            "GET", config.SERVICE_NAME, "/v1/pod/{}/info".format(pod_instance), log_response=False
        ).json()[0]["info"]
        variables = info["command"]["environment"]["variables"]
        region = next((var for var in variables if var["name"] == "REGION"), ["NO_REGION"])
        zone = next((var for var in variables if var["name"] == "ZONE"), ["NO_ZONE"])

        return region != "NO_REGION" and zone != "NO_ZONE" and len(region) > 0 and len(zone) > 0
Esempio n. 31
0
 def wait_for_plan():
     response = sdk_cmd.service_request('GET',
                                        service_name,
                                        '/v1/plans/{}'.format(plan),
                                        raise_on_error=False)
     if response.status_code == 417:
         return response  # avoid throwing, return plan with errors
     response.raise_for_status()
     return response
Esempio n. 32
0
def run_job(service_name,
            job_name,
            timeout_seconds=SHORT_TIMEOUT_SECONDS,
            **kwargs):
    params = '&'.join(["{}={}".format(i[0], i[1]) for i in kwargs.items()])
    path = 'job/{}/buildWithParameters?{}'.format(job_name, params)
    return sdk_cmd.service_request('POST',
                                   service_name,
                                   path,
                                   timeout_seconds=timeout_seconds)
Esempio n. 33
0
 def create_taskstatuses_file(self):
     response = sdk_cmd.service_request("GET", self.service_name, "/v1/debug/taskStatuses",
                                        raise_on_error=False)
     if not response.ok:
         log.error(
             "Could not get scheduler task-statuses\nstatus_code: '%s'\nstderr: '%s'",
             response.status_code, response.text
         )
     else:
         self.write_file("service_v1_debug_taskStatuses.json", response.text)
Esempio n. 34
0
 def configuration_target_id(self) -> List[str]:
     response = sdk_cmd.service_request("GET", self.service_name, "/v1/configurations/targetId",
                                        raise_on_error=False)
     if not response.ok:
         log.error(
             "Could not get scheduler configuration target id\nstatus_code: '%s'\nstderr: '%s'",
             response.status_code, response.text
         )
     else:
         return json.loads(response.text)
Esempio n. 35
0
def _set_buildable(service_name, job_name, buildable, timeout_seconds=SHORT_TIMEOUT_SECONDS):
    verb = None

    if buildable:
        verb = 'enable'
    else:
        verb = 'disable'

    path = 'job/{}/{}'.format(job_name, verb)
    return sdk_cmd.service_request('POST', service_name, path, timeout_seconds=timeout_seconds)
Esempio n. 36
0
 def create_v2_offers_file(self):
     response = sdk_cmd.service_request("GET", self.service_name, "/v2/debug/offers",
                                        raise_on_error=False)
     if not response.ok:
         log.error(
             "Could not get v2 scheduler offers\nstatus_code: '%s'\nstderr: '%s'",
             response.status_code, response.text
         )
     else:
         self.write_file("service_v2_debug_offers.json", response.text)
Esempio n. 37
0
 def configuration(self, configuration_id) -> dict:
     response = sdk_cmd.service_request("GET", self.service_name,
                                        "/v1/configurations/{}".format(configuration_id),
                                        raise_on_error=False)
     if not response.ok:
         log.error("Could not get scheduler configuration with ID '%s'"
                   "\nstatus_code: '%s'\nstderr: '%s'",
                   configuration_id, response.status_code, response.text
                   )
     else:
         return json.loads(response.text)
Esempio n. 38
0
def get_plan_once(service_name, plan, multiservice_name=None):
    if multiservice_name is None:
        path = "/v1/plans/{}".format(plan)
    else:
        path = "/v1/service/{}/plans/{}".format(multiservice_name, plan)

    response = sdk_cmd.service_request("GET", service_name, path, retry=False, raise_on_error=False)
    if response.status_code == 417:
        return response  # Plan has errors: Avoid throwing an exception, return plan as-is.
    response.raise_for_status()
    return response.json()
Esempio n. 39
0
 def create_plans_file(self):
     response = sdk_cmd.service_request("GET",
                                        self.service_name,
                                        "/v1/debug/plans",
                                        raise_on_error=False)
     if not response.ok:
         log.error(
             "Could not get scheduler plans\nstatus_code: '%s'\nstderr: '%s'",
             response.status_code, response.text)
     else:
         self.write_file("service_v1_debug_plans.json", response.text)
Esempio n. 40
0
 def configuration_target_id(self) -> List[str]:
     response = sdk_cmd.service_request("GET",
                                        self.service_name,
                                        "/v1/configurations/targetId",
                                        raise_on_error=False)
     if not response.ok:
         log.error(
             "Could not get scheduler configuration target id\nstatus_code: '%s'\nstderr: '%s'",
             response.status_code, response.text)
     else:
         return json.loads(response.text)
Esempio n. 41
0
 def create_offers_file(self):
     warnings.warn("The v1/debug/offers endpoint will be deprecated in favour of the newer "
                   "v2/debug/offers endpoint.", PendingDeprecationWarning)
     response = sdk_cmd.service_request("GET", self.service_name, "/v1/debug/offers",
                                        raise_on_error=False)
     if not response.ok:
         log.error(
             "Could not get scheduler offers\nstatus_code: '%s'\nstderr: '%s'",
             response.status_code, response.text
         )
     else:
         self.write_file("service_v1_debug_offers.html", response.text)
Esempio n. 42
0
def run_job(
    job_dict: Dict[str, Any],
    timeout_seconds: int = 600,
    raise_on_failure: bool = True,
) -> str:
    job_name = job_dict["id"]

    # Start job run, get run ID to poll against:
    run_id = sdk_cmd.service_request(
        "POST", "metronome", "/v1/jobs/{}/runs".format(job_name), log_args=False
    ).json()["id"]
    assert isinstance(run_id, str)
    log.info("Started job {}: run id {}".format(job_name, run_id))

    # Wait for run to succeed, throw if run fails:
    @retrying.retry(
        wait_fixed=1000, stop_max_delay=timeout_seconds * 1000, retry_on_result=lambda res: not res
    )
    def wait() -> bool:
        # Note: We COULD directly query the run here via /v1/jobs/<job_name>/runs/<run_id>, but that
        # only works for active runs -- for whatever reason the run will disappear after it's done.
        # Therefore we have to query the full run history from the parent job and find our run_id there.
        run_history = sdk_cmd.service_request(
            "GET",
            "metronome",
            "/v1/jobs/{}".format(job_name),
            retry=False,
            params={"embed": "history"},
        ).json()["history"]

        successful_run_ids = [run["id"] for run in run_history["successfulFinishedRuns"]]
        failed_run_ids = [run["id"] for run in run_history["failedFinishedRuns"]]

        log.info(
            "Job {} run history (waiting for successful {}): successful={} failed={}".format(
                job_name, run_id, successful_run_ids, failed_run_ids
            )
        )

        # Note: If a job has restart.policy=ON_FAILURE, it won't show up in failed_run_ids even when it fails.
        #       Instead it will just keep restarting automatically until it succeeds or is deleted.
        if raise_on_failure and run_id in failed_run_ids:
            raise Exception("Job {} with id {} has failed, exiting early".format(job_name, run_id))

        return run_id in successful_run_ids

    wait()

    return run_id
Esempio n. 43
0
def run_job(job_dict, timeout_seconds=600, raise_on_failure=True):
    job_name = job_dict['id']

    # Start job run, get run ID to poll against:
    run_id = sdk_cmd.service_request('POST', 'metronome', '/v1/jobs/{}/runs'.format(job_name), log_args=False).json()['id']
    log.info('Started job {}: run id {}'.format(job_name, run_id))

    # Wait for run to succeed, throw if run fails:
    @retrying.retry(
        wait_fixed=1000,
        stop_max_delay=timeout_seconds*1000,
        retry_on_result=lambda res: not res)
    def wait():
        # Note: We COULD directly query the run here via /v1/jobs/<job_name>/runs/<run_id>, but that
        # only works for active runs -- for whatever reason the run will disappear after it's done.
        # Therefore we have to query the full run history from the parent job and find our run_id there.
        run_history = sdk_cmd.service_request(
            'GET', 'metronome', '/v1/jobs/{}'.format(job_name),
            retry=False,
            params={'embed': 'history'}).json()['history']

        successful_run_ids = [run['id'] for run in run_history['successfulFinishedRuns']]
        failed_run_ids = [run['id'] for run in run_history['failedFinishedRuns']]

        log.info('Job {} run history (waiting for successful {}): successful={} failed={}'.format(
            job_name, run_id, successful_run_ids, failed_run_ids))

        # Note: If a job has restart.policy=ON_FAILURE, it won't show up in failed_run_ids even when it fails.
        #       Instead it will just keep restarting automatically until it succeeds or is deleted.
        if raise_on_failure and run_id in failed_run_ids:
            raise Exception('Job {} with id {} has failed, exiting early'.format(job_name, run_id))

        return run_id in successful_run_ids

    wait()

    return run_id
Esempio n. 44
0
def test_cassandra_migration():
    backup_service_name = os.getenv('CASSANDRA_BACKUP_CLUSTER_NAME')
    restore_service_name = os.getenv('CASSANDRA_RESTORE_CLUSTER_NAME')

    backup_node_address = os.getenv('BACKUP_NODE_ADDRESS', config.DEFAULT_NODE_ADDRESS)
    backup_node_port = os.getenv('BACKUP_NODE_PORT', config.DEFAULT_NODE_PORT)

    backup_write_data_job = config.get_write_data_job(backup_node_address, backup_node_port)
    backup_verify_data_job = config.get_verify_data_job(backup_node_address, backup_node_port)
    backup_delete_data_job = config.get_delete_data_job(backup_node_address, backup_node_port)
    backup_verify_deletion_job = config.get_verify_deletion_job(backup_node_address, backup_node_port)

    plan_parameters = {
        'S3_BUCKET_NAME': os.getenv(
            'AWS_BUCKET_NAME', 'infinity-framework-test'
        ),
        'AWS_ACCESS_KEY_ID': os.getenv('AWS_ACCESS_KEY_ID'),
        'AWS_SECRET_ACCESS_KEY': os.getenv('AWS_SECRET_ACCESS_KEY'),
        'AWS_REGION': os.getenv('AWS_REGION', 'us-west-2'),
        'SNAPSHOT_NAME': str(uuid.uuid1()),
        'CASSANDRA_KEYSPACES': '"testspace1 testspace2"',
    }

    backup_install_job_context = sdk_jobs.InstallJobContext(
        [backup_write_data_job, backup_verify_data_job,
         backup_delete_data_job, backup_verify_deletion_job])
    backup_run_job_context = sdk_jobs.RunJobContext(
        before_jobs=[backup_write_data_job, backup_verify_data_job],
        after_jobs=[backup_delete_data_job, backup_verify_deletion_job])
    # Install and run the write/delete data jobs against backup cluster,
    # running dcos-cassandra-service
    with backup_install_job_context, backup_run_job_context:
        # Back this cluster up to S3
        backup_parameters = {
            'backup_name': plan_parameters['SNAPSHOT_NAME'],
            's3_access_key': plan_parameters['AWS_ACCESS_KEY_ID'],
            's3_secret_key': plan_parameters['AWS_SECRET_ACCESS_KEY'],
            'external_location': 's3://{}'.format(plan_parameters['S3_BUCKET_NAME']),
        }
        sdk_cmd.service_request('PUT', backup_service_name, '/v1/backup/start', json=backup_parameters)
        sdk_plan.wait_for_completed_deployment(backup_service_name)

    # Restore data to second instance:
    restore_node_address = os.getenv(
        'RESTORE_NODE_ADDRESS', sdk_hosts.autoip_host('sdk-cassandra', 'node-0-server'))
    restore_node_port = os.getenv('RESTORE_NODE_PORT', '9052')

    restore_write_data_job = config.get_write_data_job(restore_node_address, restore_node_port)
    restore_verify_data_job = config.get_verify_data_job(restore_node_address, restore_node_port)
    restore_delete_data_job = config.get_delete_data_job(restore_node_address, restore_node_port)
    restore_verify_deletion_job = config.get_verify_deletion_job(restore_node_address, restore_node_port)

    restore_install_job_context = sdk_jobs.InstallJobContext(
        [restore_write_data_job, restore_verify_data_job,
         restore_delete_data_job, restore_verify_deletion_job]
    )
    restore_run_job_context = sdk_jobs.RunJobContext(
        after_jobs=[restore_verify_data_job, restore_delete_data_job, restore_verify_deletion_job]
    )
    with restore_install_job_context, restore_run_job_context:
        sdk_plan.start_plan(
            restore_service_name, 'restore-s3', parameters=plan_parameters
        )
        sdk_plan.wait_for_completed_plan(restore_service_name, 'restore-s3')
def get_pod_region(service_name, pod_name):
    info = sdk_cmd.service_request(
        'GET', service_name, '/v1/pod/{}/info'.format(pod_name)
    ).json()[0]['info']

    return [l['value'] for l in info['labels']['labels'] if l['key'] == 'offer_region'][0]
Esempio n. 46
0
def get_scheduler_metrics(service_name, timeout_seconds=15*60):
    """Returns a dict tree of Scheduler metrics fetched directly from the scheduler.
    Returned data will match the content of /service/<svc_name>/v1/metrics.
    """
    return sdk_cmd.service_request('GET', service_name, '/v1/metrics').json()
Esempio n. 47
0
def start_plan(service_name, plan, parameters=None):
    sdk_cmd.service_request(
        'POST', service_name, '/v1/plans/{}/start'.format(plan),
        json=parameters if parameters is not None else {})
Esempio n. 48
0
def test_task_dns_prefix_points_to_all_tasks():
    pod_info = sdk_cmd.service_request('GET', config.SERVICE_NAME, '/v1/pod/hello-0/info').json()
    # Assert that DiscoveryInfo is correctly set on tasks.
    assert(all(p["info"]["discovery"]["name"] == "hello-0" for p in pod_info))
    # Assert that the hello-0.hello-world.mesos DNS entry points to the right IP.
    sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME)
Esempio n. 49
0
def list_plans(service_name, timeout_seconds=TIMEOUT_SECONDS):
    return sdk_cmd.service_request('GET', service_name, '/v1/plans', timeout_seconds=timeout_seconds).json()