def new_default_version_available(prev_version): def fn(): get_pkg_version() def success_predicate(pkg_version): return (pkg_version != prev_version, 'Package version has not changed') spin(fn, success_predicate)
def destroy_service(): destroy_endpoint = marathon_api_url_with_param('apps', PACKAGE_NAME) request(dcos.http.delete, destroy_endpoint) # Make sure the scheduler has been destroyed def fn(): shakedown.get_service(PACKAGE_NAME) def success_predicate(service): return (service == None, 'Service not destroyed') spin(fn, success_predicate)
def test_node_is_replaced(): infinity_commons.get_and_verify_plan(lambda p: p['status'] == 'COMPLETE') replaced_node_host = [ t['slave_id'] for t in get_dcos_command('task --json') if t['name'] == 'node-0' ][0] replaced_node_task_id = get_cassandra_command('node replace node-0')[0] assert 'node-0' in replaced_node_task_id plan = infinity_commons.get_and_verify_plan( lambda p: ( p['status'] == infinity_commons.PlanState.COMPLETE.value and len(infinity_commons.filter_phase(p, "Deploy")['steps']) == 3 ) ) print(plan) assert plan['status'] == infinity_commons.PlanState.COMPLETE.value # Check that we've created a new task with a new id, waiting until a new one comes up. def get_status(): return get_first( get_dcos_command('task --json'), lambda t: t['name'] == 'node-0' )['id'] def success_predicate(task_id): return task_id != replaced_node_task_id, 'Task ID for replaced node did not change' spin(get_status, success_predicate) # Check cluster status with nodetool to assure that the new node has rejoined the cluster # and the old node has been removed, waiting until it's running (status "UN"). def get_status(): node1_host = get_first( get_first(get_dcos_command('task --json'), lambda t: t['name'] == 'node-1')['labels'], lambda label: label['key'] == 'offer_hostname' )['value'] return shakedown.run_command_on_agent( node1_host, "docker run -t --net=host pitrho/cassandra-nodetool nodetool -p 7199 status" ) def success_predicate(status): command_succeeded, status = status succeeded = ( command_succeeded and len([x for x in status.split('\n') if x.startswith('UN')]) == DEFAULT_NODE_COUNT ) return succeeded, 'Node did not successfully rejoin cluster' spin(get_status, success_predicate)
def check_scheduler_health(): # Make sure scheduler endpoint is responding and all nodes are available def fn(): try: return get_cassandra_command('node list') except RuntimeError: return [] def success_predicate(brokers): return (len(brokers) == DEFAULT_NODE_COUNT, 'Scheduler and all nodes not available') spin(fn, success_predicate)
def get_and_verify_plan(predicate=lambda r: True): global counter plan_url = cassandra_api_url('plan') def fn(): try: return dcos.http.get(plan_url) except dcos.errors.DCOSHTTPException as err: return err.response def success_predicate(result): global counter message = 'Request to {} failed'.format(plan_url) try: body = result.json() except ValueError: return False, message if counter < 3: counter += 1 pred_res = predicate(body) if pred_res: counter = 0 return pred_res, message return spin(fn, success_predicate).json()
def get_and_verify_plan(predicate=lambda r: True, wait_time=WAIT_TIME_IN_SECONDS): global counter plan_url = cassandra_api_url('plan') def fn(): try: return dcos.http.get(plan_url) except dcos.errors.DCOSHTTPException as err: return err.response def success_predicate(result): global counter message = 'Request to {} failed'.format(plan_url) try: body = result.json() except ValueError: return False, message if counter < 3: counter += 1 pred_res = predicate(body) if pred_res: counter = 0 return pred_res, message return spin(fn, success_predicate, wait_time=wait_time).json()
def get_and_verify_plan(predicate=lambda r: True, assert_success=True): print("mds inside get_and_verify_plan") global counter def fn(): str = cassandra_api_url('plans/deploy') print("get_and_verify_plan: " + str) return dcos.http.get(str, is_success=request_success) def success_predicate(result): global counter message = 'Request to /plan failed' try: body = result.json() except Exception: return False, message if counter < 3: counter += 1 if predicate(body): counter = 0 return predicate(body), message return spin(fn, success_predicate, wait_time=HEALTH_WAIT_TIME, assert_success=assert_success).json()
def _block_on_adminrouter(master_ip): headers = {'Authorization': "token={}".format(shakedown.dcos_acs_token())} metadata_url = "http://{}/metadata".format(master_ip) def get_metadata(): response = requests.get(metadata_url, headers=headers) return response def success(response): error_message = "Failed to parse json" try: is_healthy = response.json()['PUBLIC_IPV4'] == master_ip return is_healthy, "Master is not healthy yet" except Exception: return False, error_message spin(get_metadata, success, HEALTH_WAIT_TIME) log.info("Master is up again. Master IP: {}".format(master_ip))
def get_node_host(): def fn(): try: return shakedown.get_service_ips(PACKAGE_NAME) except IndexError: return set() def success_predicate(result): return len(result) == DEFAULT_NODE_COUNT, 'Nodes failed to return' return spin(fn, success_predicate).pop()
def _block_on_adminrouter(): def get_master_ip(): return shakedown.master_ip() def is_up(ip): return ip, "Failed to fetch master ip" # wait for adminrouter to recover print("Ensuring adminrouter is up...") ip = spin(get_master_ip, is_up) print("Adminrouter is up. Master IP: {}".format(ip))
def verify_leader_changed(old_leader_ip): def fn(): try: return shakedown.master_leader_ip() except DCOSAuthenticationException: print("Got exception while fetching leader") return old_leader_ip def success_predicate(new_leader_ip): is_success = old_leader_ip != new_leader_ip return is_success, "Leader has not changed" result = spin(fn, success_predicate) print("Leader has changed to {}".format(result))
def get_and_verify_plan(predicate=lambda r: True): global counter def fn(): return dcos.http.get(cassandra_api_url('plan')) def success_predicate(result): global counter message = 'Request to /plan failed' try: body = result.json() except: return False, message if counter < 3: counter += 1 if predicate(body): counter = 0 return predicate(body), message return spin(fn, success_predicate).json()
def verify_leader_changed(old_leader_ip): log.info(sys._getframe().f_code.co_name) def fn(): try: return shakedown.master_leader_ip() except DCOSAuthenticationException: log.error("Got exception while fetching leader") return old_leader_ip def success_predicate(new_leader_ip): is_success = old_leader_ip != new_leader_ip if is_success : return is_success, "(MDS) Success and leader has changed!" is_success = old_leader_ip == new_leader_ip if is_success : return is_success, "(MDS) Success and leader has not changed!" result = spin(fn, success_predicate) log.info("Leader IP {}".format(result))
def get_and_verify_plan(predicate=lambda r: True): global counter def fn(): return requests.get( cassandra_api_url('plan'), headers=request_headers() ) def success_predicate(result): global counter message = 'Request to /plan failed' try: body = result.json() except json.decoder.JSONDecodeError: return False, message if counter < 3: counter += 1 if predicate(body): counter = 0 return predicate(body), message return spin(fn, success_predicate).json()