def test_storing_a_context_on_s3_and_retrieving_it_from_a_new_client(self): stackname = 'dummy1--prod' context = cfngen.build_context('dummy1', stackname=stackname) context_handler.write_context(stackname, context) expected = context_handler.load_context(stackname) remove(context_handler.local_context_file(stackname)) downloaded = context_handler.load_context(stackname) self.assertEqual(expected, downloaded)
def _check_want_to_be_running(stackname, autostart=False): try: context = context_handler.load_context(stackname) if 'ec2' in context: # early check can only be made if the instance actually declares # ec2 True/False in its context # otherwise, don't make assumptions and go ahead if not context['ec2']: return False except context_handler.MissingContextFile as e: LOG.warn(e) instance_list = core.find_ec2_instances(stackname, allow_empty=True) num_instances = len(instance_list) if num_instances >= 1: return instance_list if not autostart: should_start = utils._pick( 'should_start', [True, False], message='Stack not running. Should it be started?') if not should_start: return False core_lifecycle.start(stackname) # to get the ip addresses that are assigned to the now-running instances # and that weren't there before return core.find_ec2_instances(stackname)
def remaster(stackname, new_master_stackname): "tell minion who their new master is. deletes any existing master key on minion" # TODO: turn this into a decorator import cfn # start the machine if it's stopped # you might also want to acquire a lock so alfred doesn't stop things cfn._check_want_to_be_running(stackname, 1) master_ip = _cached_master_ip(new_master_stackname) LOG.info('re-mastering %s to %s', stackname, master_ip) context = context_handler.load_context(stackname) # remove if no longer an issue # if context.get('ec2') == True: # # TODO: duplicates bad ec2 data wrangling in cfngen.build_context # # ec2 == True for some reason, which is completely useless # LOG.warn("bad context for stack: %s", stackname) # context['ec2'] = {} # context['project']['aws']['ec2'] = {} if not context.get('ec2'): LOG.info("no ec2 context, skipping %s", stackname) return if context['ec2'].get('master_ip') == master_ip: LOG.info("already remastered: %s", stackname) try: utils.confirm("Skip?") return except KeyboardInterrupt: LOG.info("not skipping") LOG.info("upgrading salt client") pdata = core.project_data_for_stackname(stackname) context['project']['salt'] = pdata['salt'] LOG.info("setting new master address") cfngen.set_master_address(pdata, context, master_ip) # mutates context # update context LOG.info("updating context") context_handler.write_context(stackname, context) # update buildvars LOG.info("updating buildvars") buildvars.refresh(stackname, context) # remove knowledge of old master def work(): remote_sudo("rm -f /etc/salt/pki/minion/minion_master.pub" ) # destroy the old master key we have LOG.info("removing old master key from minion") core.stack_all_ec2_nodes(stackname, work, username=config.BOOTSTRAP_USER) # update ec2 nodes LOG.info("updating nodes") bootstrap.update_ec2_stack(stackname, context, concurrency='serial') return True
def load_balancer_register_all__v1(stackname): context = context_handler.load_context(stackname) elb_name = cloudformation.read_output(stackname, 'ElasticLoadBalancer') executor = bluegreen.BlueGreenConcurrency(context['aws']['region']) node_params = all_node_params(stackname) LOG.info("Register all: %s", pformat(node_params)) executor.register(elb_name, node_params) executor.wait_registered_all(elb_name, node_params)
def remaster(stackname, new_master_stackname): "tell minion who their new master is. deletes any existing master key on minion" # TODO: turn this into a decorator import cfn # start the machine if it's stopped # you might also want to acquire a lock so alfred doesn't stop things cfn._check_want_to_be_running(stackname, 1) master_ip = _cached_master_ip(new_master_stackname) LOG.info('re-mastering %s to %s', stackname, master_ip) context = context_handler.load_context(stackname) # remove if no longer an issue # if context.get('ec2') == True: # # TODO: duplicates bad ec2 data wrangling in cfngen.build_context # # ec2 == True for some reason, which is completely useless # LOG.warn("bad context for stack: %s", stackname) # context['ec2'] = {} # context['project']['aws']['ec2'] = {} if not context.get('ec2'): LOG.info("no ec2 context, skipping %s", stackname) return if context['ec2'].get('master_ip') == master_ip: LOG.info("already remastered: %s", stackname) try: utils.confirm("Skip?") return except KeyboardInterrupt: LOG.info("not skipping") LOG.info("upgrading salt client") pdata = core.project_data_for_stackname(stackname) context['project']['salt'] = pdata['salt'] LOG.info("setting new master address") cfngen.set_master_address(pdata, context, master_ip) # mutates context # update context LOG.info("updating context") context_handler.write_context(stackname, context) # update buildvars LOG.info("updating buildvars") buildvars.refresh(stackname, context) # remove knowledge of old master def work(): sudo("rm -f /etc/salt/pki/minion/minion_master.pub") # destroy the old master key we have LOG.info("removing old master key from minion") core.stack_all_ec2_nodes(stackname, work, username=config.BOOTSTRAP_USER) # update ec2 nodes LOG.info("updating nodes") bootstrap.update_ec2_stack(stackname, context, concurrency='serial') return True
def load_balancer_status(stackname): context = context_handler.load_context(stackname) # TODO: delegate to BlueGreenConcurrency? elb_name = cloudformation.read_output(stackname, 'ElasticLoadBalancer') conn = boto_client('elb', context['aws']['region']) health = conn.describe_instance_health( LoadBalancerName=elb_name, )['InstanceStates'] LOG.info("Load balancer name: %s", elb_name) LOG.info("Health: %s", pformat(health))
def _fix_single_ec2_node(stackname): LOG.info("checking build vars on node %s", current_node_id()) try: buildvars = _retrieve_build_vars() LOG.info("valid bvars found, no fix necessary: %s", buildvars) except AssertionError: LOG.info("invalid build vars found, regenerating from context") context = load_context(stackname) # some contexts are missing stackname context['stackname'] = stackname node_id = current_node_id() new_vars = trop.build_vars(context, node_id) _update_remote_bvars(stackname, new_vars)
def _fix_single_ec2_node(stackname): LOG.info("checking build vars on node %s", current_node_id()) try: buildvars = _retrieve_build_vars() LOG.info("valid bvars found, no fix necessary: %s", buildvars) except AssertionError: LOG.info("invalid build vars found, regenerating from context") context = load_context(stackname) # some contexts are missing stackname context['stackname'] = stackname node_id = current_node_id() new_vars = trop.build_vars(context, node_id) _update_remote_bvars(stackname, new_vars)
def set_versions(stackname, *repolist): """updates the cloned formulas on a masterless stack to a specific revision. call with formula name and a revision, like: builder-private@ab87af78asdf2321431f31""" context = context_handler.load_context(stackname) fkeys = ['formula-repo', 'formula-dependencies', 'private-repo', 'configuration-repo'] fdata = subdict(context['project'], fkeys) repolist = parse_validate_repolist(fdata, *repolist) if not repolist: return 'nothing to do' def updater(): for repo, formula, revision in repolist: bootstrap.run_script('update-masterless-formula.sh', repo, formula, revision) core.stack_all_ec2_nodes(stackname, updater, concurrency='serial')
def _create_generic_stack(stackname, parameters=None, on_start=_noop, on_error=_noop): "simply creates the stack of resources on AWS, talking to CloudFormation." if not parameters: parameters = [] LOG.info('creating stack %r', stackname) stack_body = core.stack_json(stackname) try: on_start() conn = connect_aws_with_stack(stackname, 'cfn') conn.create_stack(stackname, stack_body, parameters=parameters) _wait_until_in_progress(stackname) context = context_handler.load_context(stackname) # setup various resources after creation, where necessary setup_ec2(stackname, context['ec2']) return True except StackTakingALongTimeToComplete as err: LOG.info("Stack taking a long time to complete: %s", err.message) raise except BotoServerError as err: if err.message.endswith(' already exists'): LOG.debug(err.message) return False LOG.exception("unhandled Boto exception attempting to create stack", extra={ 'stackname': stackname, 'parameters': parameters }) on_error() raise except KeyboardInterrupt: LOG.debug("caught keyboard interrupt, cancelling...") return False except BaseException: LOG.exception("unhandled exception attempting to create stack", extra={'stackname': stackname}) on_error() raise
def _check_want_to_be_running(stackname, autostart=False): try: context = context_handler.load_context(stackname) if not _are_there_existing_servers(context): return False except context_handler.MissingContextFile as e: LOG.warn(e) instance_list = core.find_ec2_instances(stackname, allow_empty=True) num_instances = len(instance_list) if num_instances >= 1: return instance_list if not autostart: should_start = utils._pick('should_start', [True, False], message='Stack not running. Should it be started?') if not should_start: return False core_lifecycle.start(stackname) # another call to get the ip addresses that are assigned to the now-running # instances and that weren't there before return core.find_ec2_instances(stackname)
def refresh(stackname, context=None): "(safely) replaces the buildvars file on the ec2 instance(s)" context = context or load_context(stackname) def _refresh_buildvars(): old_buildvars = _retrieve_build_vars() node = old_buildvars.get('node') if not node or not str(node).isdigit(): # (very) old buildvars. try parsing 'nodename' nodename = old_buildvars.get('nodename') if nodename: # ll: "elife-dashboard--prod--1" node = nodename.split('--')[-1] if not node.isdigit(): LOG.warning("nodename ends in a non-digit node: %s", nodename) node = None if not node: # no 'node' and no (valid) 'nodename' present # assume this stack was created before nodes were a thing # and that there is only 1 in the 'cluster'. node = 1 new_buildvars = trop.build_vars(context, int(node)) new_buildvars['revision'] = old_buildvars.get( 'revision') # TODO: is this still necessary? _update_remote_bvars(stackname, new_buildvars) # lsh@2019-06: cfn.update_infrastructure fails to run highstate on new (unvisited? not the instance author?) # ec2 instance if keypair not present, it prompts for a password for the deploy user. prompts when executing # in parallel cause operation to fail. keypair.download_from_s3(stackname, die_if_exists=False) stack_all_ec2_nodes(stackname, _refresh_buildvars, username=BOOTSTRAP_USER)
def repair_context(stackname): # triggers the workaround of downloading it from EC2 and persisting it load_context(stackname)
def wrapper(stackname=None, *args, **kwargs): ctx = context_handler.load_context(stackname) ensure(stackname and ctx['ec2']['masterless'], "this command requires a masterless instance.") return fn(stackname, *args, **kwargs)