def start_plan(service_name, plan, parameters=None): sdk_cmd.service_request( "POST", service_name, "/v1/plans/{}/start".format(plan), json=parameters if parameters is not None else {}, )
def install_job(job_dict): job_name = job_dict['id'] # attempt to delete current job, if any: _remove_job_by_name(job_name) log.info('Adding job {}:\n{}'.format(job_name, json.dumps(job_dict))) sdk_cmd.service_request('POST', 'metronome', '/v1/jobs', json=job_dict)
def force_complete_step(service_name: str, plan: str, phase: str, step: str) -> None: sdk_cmd.service_request( "POST", service_name, "/v1/plans/{}/forceComplete?phase={}&step={}".format( plan, phase, step), )
def copy_job(service_name, src_name, dst_name, timeout_seconds=SHORT_TIMEOUT_SECONDS): path = 'createItem?name={}&mode=copy&from={}'.format(dst_name, src_name) sdk_cmd.service_request('POST', service_name, path, timeout_seconds=timeout_seconds) # Copy starts jobs off disable and you have to disable them and enable again to get them "buildable" # https://github.com/entagen/jenkins-build-per-branch/issues/41 disable_job(service_name, dst_name) enable_job(service_name, dst_name)
def install_job(job_dict: Dict[str, Any]) -> None: job_name = job_dict["id"] # attempt to delete current job, if any: _remove_job_by_name(job_name) log.info("Adding job {}:\n{}".format(job_name, json.dumps(job_dict))) sdk_cmd.service_request("POST", "metronome", "/v1/jobs", json=job_dict)
def start_plan(service_name: str, plan: str, parameters: Optional[Dict[str, Any]] = None) -> None: sdk_cmd.service_request( "POST", service_name, "/v1/plans/{}/start".format(plan), json=parameters if parameters is not None else {}, )
def test_add_deploy_restart_remove(): svc1 = 'test1' # add svc as test1: sdk_cmd.service_request('POST', config.SERVICE_NAME, '/v1/multi/{}?yaml=svc'.format(svc1), json=service_params(svc1)) # get list, should immediately have new entry: service = get_service_list()[0] assert service['service'] == svc1 assert service['yaml'] == 'svc' assert not service['uninstall'] sdk_plan.wait_for_plan_status(config.SERVICE_NAME, 'deploy', 'COMPLETE', multiservice_name=svc1) task_ids = sdk_tasks.get_task_ids('marathon', config.SERVICE_NAME) log.info('list of task ids {}'.format(task_ids)) old_task_id = task_ids[0] # restart and check that service is recovered: sdk_marathon.restart_app(config.SERVICE_NAME) sdk_marathon.wait_for_app_running(config.SERVICE_NAME, sdk_marathon.TIMEOUT_SECONDS) # check that scheduler task was relaunched check_scheduler_relaunched(config.SERVICE_NAME, old_task_id) service = wait_for_service_count(1)[0] assert service['service'] == svc1 assert service['yaml'] == 'svc' assert not service['uninstall'] plan = sdk_plan.wait_for_plan_status(config.SERVICE_NAME, 'deploy', 'COMPLETE', multiservice_name=svc1) # verify that svc.yml was deployed as svc1: assert sdk_plan.get_all_step_names(plan) == [ 'hello-0:[server]', 'world-0:[server]', 'world-1:[server]' ] # trigger service removal, wait for removal: sdk_cmd.service_request('DELETE', config.SERVICE_NAME, '/v1/multi/{}'.format(svc1)) # check delete bit is set. however, be permissive of service being removed VERY quickly: services = get_service_list() assert len(services) <= 1 for service in services: assert service['service'] == svc1 assert service['yaml'] == 'svc' assert service['uninstall'] wait_for_service_count(0)
def test_add_deploy_restart_remove(): svc1 = "test1" # add svc as test1: sdk_cmd.service_request("POST", config.SERVICE_NAME, "/v1/multi/{}?yaml=svc".format(svc1), json=service_params(svc1)) # get list, should immediately have new entry: service = get_service_list()[0] assert service["service"] == svc1 assert service["yaml"] == "svc" assert not service["uninstall"] sdk_plan.wait_for_plan_status(config.SERVICE_NAME, "deploy", "COMPLETE", multiservice_name=svc1) task_ids = sdk_tasks.get_task_ids("marathon", config.SERVICE_NAME) log.info("list of task ids {}".format(task_ids)) old_task_id = task_ids[0] # restart and check that service is recovered: sdk_marathon.restart_app(config.SERVICE_NAME) # check that scheduler task was relaunched sdk_tasks.check_scheduler_relaunched(config.SERVICE_NAME, old_task_id) service = wait_for_service_count(1)[0] assert service["service"] == svc1 assert service["yaml"] == "svc" assert not service["uninstall"] plan = sdk_plan.wait_for_plan_status(config.SERVICE_NAME, "deploy", "COMPLETE", multiservice_name=svc1) # verify that svc.yml was deployed as svc1: assert sdk_plan.get_all_step_names(plan) == [ "hello-0:[server]", "world-0:[server]", "world-1:[server]", ] # trigger service removal, wait for removal: sdk_cmd.service_request("DELETE", config.SERVICE_NAME, "/v1/multi/{}".format(svc1)) # check delete bit is set. however, be permissive of service being removed VERY quickly: services = get_service_list() assert len(services) <= 1 for service in services: assert service["service"] == svc1 assert service["yaml"] == "svc" assert service["uninstall"] wait_for_service_count(0)
def _remove_job_by_name(job_name): try: # Metronome doesn't understand 'True' -- only 'true' will do. sdk_cmd.service_request( 'DELETE', 'metronome', '/v1/jobs/{}'.format(job_name), retry=False, params={'stopCurrentJobRuns': 'true'}) except: log.info('Failed to remove any existing job named {} (this is likely as expected):\n{}'.format( job_name, traceback.format_exc()))
def require_spark(service_name=SPARK_SERVICE_NAME, additional_options={}, zk='spark_mesos_dispatcher'): teardown_spark(service_name, zk) sdk_install.install( SPARK_PACKAGE_NAME, service_name, 0, additional_options=get_spark_options(service_name, additional_options), wait_for_deployment=False, # no deploy plan insert_strict_options=False) # lacks principal + secret_name options # wait for dispatcher to be reachable over HTTP sdk_cmd.service_request('GET', service_name, '', timeout_seconds=300)
def _remove_job_by_name(job_name): try: # Metronome doesn't understand 'True' -- only 'true' will do. sdk_cmd.service_request( "DELETE", "metronome", "/v1/jobs/{}".format(job_name), retry=False, params={"stopCurrentJobRuns": "true"}, ) except Exception as e: log.info( "Failed to remove any existing job named {} (this is likely as expected):\n{}" .format(job_name, e))
def _dump_threads(item: pytest.Item, service_name: str): threads = sdk_cmd.service_request('GET', service_name, 'v1/debug/threads') out_path = _setup_artifact_path(item, 'threads_{}.out'.format(service_name.replace('/', '_'))) log.info('=> Writing {} ({} bytes)'.format(out_path, len(threads))) with open(out_path, 'w') as f: f.write(threads) f.write('\n') # ... and a trailing newline
def test_cni_labels(): def check_labels(labels, idx): k = labels[idx]["key"] v = labels[idx]["value"] assert k in EXPECTED_NETWORK_LABELS.keys( ), "Got unexpected network key {}".format(k) assert v == EXPECTED_NETWORK_LABELS[k], "Value {obs} isn't correct, should be " \ "{exp}".format(obs=v, exp=EXPECTED_NETWORK_LABELS[k]) r = sdk_cmd.service_request('GET', config.SERVICE_NAME, "/v1/pod/hello-overlay-vip-0/info").json() assert len( r) == 1, "Got multiple responses from v1/pod/hello-overlay-vip-0/info" try: cni_labels = r[0]["info"]["executor"]["container"]["networkInfos"][0][ "labels"]["labels"] except KeyError: assert False, "CNI labels not present" assert len(cni_labels) == 2, "Got {} labels, should be 2".format( len(cni_labels)) for i in range(2): try: check_labels(cni_labels, i) except KeyError: assert False, "Couldn't get CNI labels from {}".format(cni_labels)
def test_all_tasks_are_launched(): service_options = {"service": {"yaml": "plan"}} sdk_install.install(config.PACKAGE_NAME, foldered_name, 0, additional_options=service_options, wait_for_deployment=False, wait_for_all_conditions=True) # after above method returns, start all plans right away. plans = ["manual-plan-0", "manual-plan-1", "manual-plan-2"] for plan in plans: sdk_plan.start_plan(foldered_name, plan) for plan in plans: sdk_plan.wait_for_completed_plan(foldered_name, plan) pods = ["custom-pod-A-0", "custom-pod-B-0", "custom-pod-C-0"] for pod in pods: # /pod/<pod-id>/info fetches data from SDK's persistence layer pod_hello_0_info = sdk_cmd.service_request( "GET", foldered_name, "/v1/pod/{}/info".format(pod)).json() for taskInfoAndStatus in pod_hello_0_info: info = taskInfoAndStatus["info"] status = taskInfoAndStatus["status"] # While `info` object is always present, `status` may or may not be present based # on whether the task was launched and we received an update from mesos (or not). if status: assert info["taskId"]["value"] == status["taskId"]["value"] assert len(info["taskId"]["value"]) > 0 else: assert len(info["taskId"]["value"]) == 0
def fault_domain_vars_are_present(pod_instance): info = sdk_cmd.service_request('GET', config.SERVICE_NAME, '/v1/pod/{}/info'.format(pod_instance)).json()[0]['info'] variables = info['command']['environment']['variables'] region = next((var for var in variables if var['name'] == 'REGION'), ['NO_REGION']) zone = next((var for var in variables if var['name'] == 'ZONE'), ['NO_ZONE']) return region != 'NO_REGION' and zone != 'NO_ZONE' and len(region) > 0 and len(zone) > 0
def wait(): # Note: We COULD directly query the run here via /v1/jobs/<job_name>/runs/<run_id>, but that # only works for active runs -- for whatever reason the run will disappear after it's done. # Therefore we have to query the full run history from the parent job and find our run_id there. run_history = sdk_cmd.service_request('GET', 'metronome', '/v1/jobs/{}'.format(job_name), retry=False, params={ 'embed': 'history' }).json()['history'] successful_run_ids = [ run['id'] for run in run_history['successfulFinishedRuns'] ] failed_run_ids = [ run['id'] for run in run_history['failedFinishedRuns'] ] log.info( 'Job {} run history (waiting for successful {}): successful={} failed={}' .format(job_name, run_id, successful_run_ids, failed_run_ids)) # Note: If a job has restart.policy=ON_FAILURE, it won't show up in failed_run_ids even when it fails. # Instead it will just keep restarting automatically until it succeeds or is deleted. if raise_on_failure and run_id in failed_run_ids: raise Exception( 'Job {} with id {} has failed, exiting early'.format( job_name, run_id)) return run_id in successful_run_ids
def test_cni_labels(): def check_labels(labels, idx): k = labels[idx]["key"] v = labels[idx]["value"] expected_network_labels = {"key0": "val0", "key1": "val1"} assert k in expected_network_labels.keys(), "Got unexpected network key {}".format(k) assert v == expected_network_labels[k], ( "Value {obs} isn't correct, should be " "{exp}".format(obs=v, exp=expected_network_labels[k]) ) r = sdk_cmd.service_request( "GET", config.SERVICE_NAME, "/v1/pod/overlay-vip-0/info" ).json() assert len(r) == 1, "Got multiple responses from v1/pod/overlay-vip-0/info" try: cni_labels = r[0]["info"]["executor"]["container"]["networkInfos"][0]["labels"]["labels"] except KeyError: assert False, "CNI labels not present" assert len(cni_labels) == 2, "Got {} labels, should be 2".format(len(cni_labels)) for i in range(2): try: check_labels(cni_labels, i) except KeyError: assert False, "Couldn't get CNI labels from {}".format(cni_labels)
def configure_package(configure_security): try: sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME) options = { "service": { # empty yaml: start in dynamic multiservice mode "yaml": "" } } # do not poll scheduler-level deploy plan, there is none: sdk_install.install( config.PACKAGE_NAME, config.SERVICE_NAME, 0, additional_options=options, wait_for_deployment=False, ) # use yaml list as a proxy for checking that the scheduler is up: yamls = sdk_cmd.service_request("GET", config.SERVICE_NAME, "/v1/multi/yaml").json() assert "svc" in yamls yield # let the test session execute finally: sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME)
def test_task_dns_prefix_points_to_all_tasks(): pod_info = sdk_cmd.service_request('GET', config.SERVICE_NAME, '/v1/pod/hello-0/info').json() # Assert that DiscoveryInfo is correctly set on tasks. assert (all(p["info"]["discovery"]["name"] == "hello-0" for p in pod_info)) # Assert that the hello-0.hello-world.mesos DNS entry points to the right IP. sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME)
def wait() -> bool: # Note: We COULD directly query the run here via /v1/jobs/<job_name>/runs/<run_id>, but that # only works for active runs -- for whatever reason the run will disappear after it's done. # Therefore we have to query the full run history from the parent job and find our run_id there. run_history = sdk_cmd.service_request( "GET", "metronome", "/v1/jobs/{}".format(job_name), retry=False, params={"embed": "history"}, ).json()["history"] successful_run_ids = [run["id"] for run in run_history["successfulFinishedRuns"]] failed_run_ids = [run["id"] for run in run_history["failedFinishedRuns"]] log.info( "Job {} run history (waiting for successful {}): successful={} failed={}".format( job_name, run_id, successful_run_ids, failed_run_ids ) ) # Note: If a job has restart.policy=ON_FAILURE, it won't show up in failed_run_ids even when it fails. # Instead it will just keep restarting automatically until it succeeds or is deleted. if raise_on_failure and run_id in failed_run_ids: raise Exception("Job {} with id {} has failed, exiting early".format(job_name, run_id)) return run_id in successful_run_ids
def _get_jenkins_json(service_name, path, timeout_seconds=SHORT_TIMEOUT_SECONDS): return sdk_cmd.service_request('GET', service_name, path, timeout_seconds=timeout_seconds).json()
def _remove_job_by_name(job_name: str) -> None: try: # Metronome doesn't understand 'True' -- only 'true' will do. sdk_cmd.service_request( "DELETE", "metronome", "/v1/jobs/{}".format(job_name), retry=False, params={"stopCurrentJobRuns": "true"}, ) except Exception as e: log.info( "Failed to remove any existing job named {} (this is likely as expected):\n{}".format( job_name, e ) )
def make_post( post_body, service_name, **kwargs ): """ :rtype: requests.Response """ body = IMPORTS + post_body log.info('\nMaking request : ========\n{}\n========\n'.format(body)) ''' Note: To run locally: curl -i -H "Authorization:token=$(dcos config show core.dcos_acs_token)" \ -k --data-urlencode "script=$(< <path-to-above-script-file>)" \ https://<dcos-cluster>/service/jenkins/scriptText' ''' import sdk_cmd return sdk_cmd.service_request( 'POST', service_name, 'scriptText', log_args=False, data={'script': body}, **kwargs, )
def list_plans(service_name, timeout_seconds=TIMEOUT_SECONDS, multiservice_name=None): if multiservice_name is None: path = "/v1/plans" else: path = "/v1/service/{}/plans".format(multiservice_name) return sdk_cmd.service_request( "GET", service_name, path, timeout_seconds=timeout_seconds ).json()
def get_scheduler_metrics(service_name: str, timeout_seconds: int = 15 * 60) -> Dict[str, Any]: """Returns a dict tree of Scheduler metrics fetched directly from the scheduler. Returned data will match the content of /service/<svc_name>/v1/metrics. """ response = sdk_cmd.service_request("GET", service_name, "/v1/metrics") response_json = response.json() assert isinstance(response_json, dict) return response_json
def _dump_threads(item: pytest.Item, service_name: str) -> None: threads = sdk_cmd.service_request( "GET", service_name, "v1/debug/threads", timeout_seconds=5 ).text out_path = _setup_artifact_path(item, "threads_{}.txt".format(service_name.replace("/", "_"))) log.info("=> Writing {} ({} bytes)".format(out_path, len(threads))) with open(out_path, "w") as f: f.write(threads) f.write("\n") # ... and a trailing newline
def get_pod_region(service_name, pod_name): info = sdk_cmd.service_request( 'GET', service_name, '/v1/pod/{}/info'.format(pod_name)).json()[0]['info'] return [ l['value'] for l in info['labels']['labels'] if l['key'] == 'offer_region' ][0]
def wait_for_plan(): response = sdk_cmd.service_request( 'GET', service_name, '/v1/plans/{}'.format(plan), retry=False, raise_on_error=False) if response.status_code == 417: return response # avoid throwing, return plan with errors response.raise_for_status() return response
def fault_domain_vars_are_present(pod_instance): info = sdk_cmd.service_request( "GET", config.SERVICE_NAME, "/v1/pod/{}/info".format(pod_instance), log_response=False ).json()[0]["info"] variables = info["command"]["environment"]["variables"] region = next((var for var in variables if var["name"] == "REGION"), ["NO_REGION"]) zone = next((var for var in variables if var["name"] == "ZONE"), ["NO_ZONE"]) return region != "NO_REGION" and zone != "NO_ZONE" and len(region) > 0 and len(zone) > 0
def wait_for_plan(): response = sdk_cmd.service_request('GET', service_name, '/v1/plans/{}'.format(plan), raise_on_error=False) if response.status_code == 417: return response # avoid throwing, return plan with errors response.raise_for_status() return response
def run_job(service_name, job_name, timeout_seconds=SHORT_TIMEOUT_SECONDS, **kwargs): params = '&'.join(["{}={}".format(i[0], i[1]) for i in kwargs.items()]) path = 'job/{}/buildWithParameters?{}'.format(job_name, params) return sdk_cmd.service_request('POST', service_name, path, timeout_seconds=timeout_seconds)
def create_taskstatuses_file(self): response = sdk_cmd.service_request("GET", self.service_name, "/v1/debug/taskStatuses", raise_on_error=False) if not response.ok: log.error( "Could not get scheduler task-statuses\nstatus_code: '%s'\nstderr: '%s'", response.status_code, response.text ) else: self.write_file("service_v1_debug_taskStatuses.json", response.text)
def configuration_target_id(self) -> List[str]: response = sdk_cmd.service_request("GET", self.service_name, "/v1/configurations/targetId", raise_on_error=False) if not response.ok: log.error( "Could not get scheduler configuration target id\nstatus_code: '%s'\nstderr: '%s'", response.status_code, response.text ) else: return json.loads(response.text)
def _set_buildable(service_name, job_name, buildable, timeout_seconds=SHORT_TIMEOUT_SECONDS): verb = None if buildable: verb = 'enable' else: verb = 'disable' path = 'job/{}/{}'.format(job_name, verb) return sdk_cmd.service_request('POST', service_name, path, timeout_seconds=timeout_seconds)
def create_v2_offers_file(self): response = sdk_cmd.service_request("GET", self.service_name, "/v2/debug/offers", raise_on_error=False) if not response.ok: log.error( "Could not get v2 scheduler offers\nstatus_code: '%s'\nstderr: '%s'", response.status_code, response.text ) else: self.write_file("service_v2_debug_offers.json", response.text)
def configuration(self, configuration_id) -> dict: response = sdk_cmd.service_request("GET", self.service_name, "/v1/configurations/{}".format(configuration_id), raise_on_error=False) if not response.ok: log.error("Could not get scheduler configuration with ID '%s'" "\nstatus_code: '%s'\nstderr: '%s'", configuration_id, response.status_code, response.text ) else: return json.loads(response.text)
def get_plan_once(service_name, plan, multiservice_name=None): if multiservice_name is None: path = "/v1/plans/{}".format(plan) else: path = "/v1/service/{}/plans/{}".format(multiservice_name, plan) response = sdk_cmd.service_request("GET", service_name, path, retry=False, raise_on_error=False) if response.status_code == 417: return response # Plan has errors: Avoid throwing an exception, return plan as-is. response.raise_for_status() return response.json()
def create_plans_file(self): response = sdk_cmd.service_request("GET", self.service_name, "/v1/debug/plans", raise_on_error=False) if not response.ok: log.error( "Could not get scheduler plans\nstatus_code: '%s'\nstderr: '%s'", response.status_code, response.text) else: self.write_file("service_v1_debug_plans.json", response.text)
def configuration_target_id(self) -> List[str]: response = sdk_cmd.service_request("GET", self.service_name, "/v1/configurations/targetId", raise_on_error=False) if not response.ok: log.error( "Could not get scheduler configuration target id\nstatus_code: '%s'\nstderr: '%s'", response.status_code, response.text) else: return json.loads(response.text)
def create_offers_file(self): warnings.warn("The v1/debug/offers endpoint will be deprecated in favour of the newer " "v2/debug/offers endpoint.", PendingDeprecationWarning) response = sdk_cmd.service_request("GET", self.service_name, "/v1/debug/offers", raise_on_error=False) if not response.ok: log.error( "Could not get scheduler offers\nstatus_code: '%s'\nstderr: '%s'", response.status_code, response.text ) else: self.write_file("service_v1_debug_offers.html", response.text)
def run_job( job_dict: Dict[str, Any], timeout_seconds: int = 600, raise_on_failure: bool = True, ) -> str: job_name = job_dict["id"] # Start job run, get run ID to poll against: run_id = sdk_cmd.service_request( "POST", "metronome", "/v1/jobs/{}/runs".format(job_name), log_args=False ).json()["id"] assert isinstance(run_id, str) log.info("Started job {}: run id {}".format(job_name, run_id)) # Wait for run to succeed, throw if run fails: @retrying.retry( wait_fixed=1000, stop_max_delay=timeout_seconds * 1000, retry_on_result=lambda res: not res ) def wait() -> bool: # Note: We COULD directly query the run here via /v1/jobs/<job_name>/runs/<run_id>, but that # only works for active runs -- for whatever reason the run will disappear after it's done. # Therefore we have to query the full run history from the parent job and find our run_id there. run_history = sdk_cmd.service_request( "GET", "metronome", "/v1/jobs/{}".format(job_name), retry=False, params={"embed": "history"}, ).json()["history"] successful_run_ids = [run["id"] for run in run_history["successfulFinishedRuns"]] failed_run_ids = [run["id"] for run in run_history["failedFinishedRuns"]] log.info( "Job {} run history (waiting for successful {}): successful={} failed={}".format( job_name, run_id, successful_run_ids, failed_run_ids ) ) # Note: If a job has restart.policy=ON_FAILURE, it won't show up in failed_run_ids even when it fails. # Instead it will just keep restarting automatically until it succeeds or is deleted. if raise_on_failure and run_id in failed_run_ids: raise Exception("Job {} with id {} has failed, exiting early".format(job_name, run_id)) return run_id in successful_run_ids wait() return run_id
def run_job(job_dict, timeout_seconds=600, raise_on_failure=True): job_name = job_dict['id'] # Start job run, get run ID to poll against: run_id = sdk_cmd.service_request('POST', 'metronome', '/v1/jobs/{}/runs'.format(job_name), log_args=False).json()['id'] log.info('Started job {}: run id {}'.format(job_name, run_id)) # Wait for run to succeed, throw if run fails: @retrying.retry( wait_fixed=1000, stop_max_delay=timeout_seconds*1000, retry_on_result=lambda res: not res) def wait(): # Note: We COULD directly query the run here via /v1/jobs/<job_name>/runs/<run_id>, but that # only works for active runs -- for whatever reason the run will disappear after it's done. # Therefore we have to query the full run history from the parent job and find our run_id there. run_history = sdk_cmd.service_request( 'GET', 'metronome', '/v1/jobs/{}'.format(job_name), retry=False, params={'embed': 'history'}).json()['history'] successful_run_ids = [run['id'] for run in run_history['successfulFinishedRuns']] failed_run_ids = [run['id'] for run in run_history['failedFinishedRuns']] log.info('Job {} run history (waiting for successful {}): successful={} failed={}'.format( job_name, run_id, successful_run_ids, failed_run_ids)) # Note: If a job has restart.policy=ON_FAILURE, it won't show up in failed_run_ids even when it fails. # Instead it will just keep restarting automatically until it succeeds or is deleted. if raise_on_failure and run_id in failed_run_ids: raise Exception('Job {} with id {} has failed, exiting early'.format(job_name, run_id)) return run_id in successful_run_ids wait() return run_id
def test_cassandra_migration(): backup_service_name = os.getenv('CASSANDRA_BACKUP_CLUSTER_NAME') restore_service_name = os.getenv('CASSANDRA_RESTORE_CLUSTER_NAME') backup_node_address = os.getenv('BACKUP_NODE_ADDRESS', config.DEFAULT_NODE_ADDRESS) backup_node_port = os.getenv('BACKUP_NODE_PORT', config.DEFAULT_NODE_PORT) backup_write_data_job = config.get_write_data_job(backup_node_address, backup_node_port) backup_verify_data_job = config.get_verify_data_job(backup_node_address, backup_node_port) backup_delete_data_job = config.get_delete_data_job(backup_node_address, backup_node_port) backup_verify_deletion_job = config.get_verify_deletion_job(backup_node_address, backup_node_port) plan_parameters = { 'S3_BUCKET_NAME': os.getenv( 'AWS_BUCKET_NAME', 'infinity-framework-test' ), 'AWS_ACCESS_KEY_ID': os.getenv('AWS_ACCESS_KEY_ID'), 'AWS_SECRET_ACCESS_KEY': os.getenv('AWS_SECRET_ACCESS_KEY'), 'AWS_REGION': os.getenv('AWS_REGION', 'us-west-2'), 'SNAPSHOT_NAME': str(uuid.uuid1()), 'CASSANDRA_KEYSPACES': '"testspace1 testspace2"', } backup_install_job_context = sdk_jobs.InstallJobContext( [backup_write_data_job, backup_verify_data_job, backup_delete_data_job, backup_verify_deletion_job]) backup_run_job_context = sdk_jobs.RunJobContext( before_jobs=[backup_write_data_job, backup_verify_data_job], after_jobs=[backup_delete_data_job, backup_verify_deletion_job]) # Install and run the write/delete data jobs against backup cluster, # running dcos-cassandra-service with backup_install_job_context, backup_run_job_context: # Back this cluster up to S3 backup_parameters = { 'backup_name': plan_parameters['SNAPSHOT_NAME'], 's3_access_key': plan_parameters['AWS_ACCESS_KEY_ID'], 's3_secret_key': plan_parameters['AWS_SECRET_ACCESS_KEY'], 'external_location': 's3://{}'.format(plan_parameters['S3_BUCKET_NAME']), } sdk_cmd.service_request('PUT', backup_service_name, '/v1/backup/start', json=backup_parameters) sdk_plan.wait_for_completed_deployment(backup_service_name) # Restore data to second instance: restore_node_address = os.getenv( 'RESTORE_NODE_ADDRESS', sdk_hosts.autoip_host('sdk-cassandra', 'node-0-server')) restore_node_port = os.getenv('RESTORE_NODE_PORT', '9052') restore_write_data_job = config.get_write_data_job(restore_node_address, restore_node_port) restore_verify_data_job = config.get_verify_data_job(restore_node_address, restore_node_port) restore_delete_data_job = config.get_delete_data_job(restore_node_address, restore_node_port) restore_verify_deletion_job = config.get_verify_deletion_job(restore_node_address, restore_node_port) restore_install_job_context = sdk_jobs.InstallJobContext( [restore_write_data_job, restore_verify_data_job, restore_delete_data_job, restore_verify_deletion_job] ) restore_run_job_context = sdk_jobs.RunJobContext( after_jobs=[restore_verify_data_job, restore_delete_data_job, restore_verify_deletion_job] ) with restore_install_job_context, restore_run_job_context: sdk_plan.start_plan( restore_service_name, 'restore-s3', parameters=plan_parameters ) sdk_plan.wait_for_completed_plan(restore_service_name, 'restore-s3')
def get_pod_region(service_name, pod_name): info = sdk_cmd.service_request( 'GET', service_name, '/v1/pod/{}/info'.format(pod_name) ).json()[0]['info'] return [l['value'] for l in info['labels']['labels'] if l['key'] == 'offer_region'][0]
def get_scheduler_metrics(service_name, timeout_seconds=15*60): """Returns a dict tree of Scheduler metrics fetched directly from the scheduler. Returned data will match the content of /service/<svc_name>/v1/metrics. """ return sdk_cmd.service_request('GET', service_name, '/v1/metrics').json()
def start_plan(service_name, plan, parameters=None): sdk_cmd.service_request( 'POST', service_name, '/v1/plans/{}/start'.format(plan), json=parameters if parameters is not None else {})
def test_task_dns_prefix_points_to_all_tasks(): pod_info = sdk_cmd.service_request('GET', config.SERVICE_NAME, '/v1/pod/hello-0/info').json() # Assert that DiscoveryInfo is correctly set on tasks. assert(all(p["info"]["discovery"]["name"] == "hello-0" for p in pod_info)) # Assert that the hello-0.hello-world.mesos DNS entry points to the right IP. sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME)
def list_plans(service_name, timeout_seconds=TIMEOUT_SECONDS): return sdk_cmd.service_request('GET', service_name, '/v1/plans', timeout_seconds=timeout_seconds).json()