def respawn_elastigroup(elastigroup_id: str, stack_name: str, region: str, batch_size: int): ''' Respawn all instances in the ElastiGroup. ''' if batch_size is None or batch_size < 1: batch_size = DEFAULT_BATCH_SIZE spotinst_account = elastigroup_api.get_spotinst_account_data(region, stack_name) info('Redeploying the cluster for ElastiGroup {} (ID {})'.format(stack_name, elastigroup_id)) deploy_output = elastigroup_api.deploy(batch_size=batch_size, grace_period=600, elastigroup_id=elastigroup_id, spotinst_account_data=spotinst_account) deploy_count = len(deploy_output) deploys_finished = 0 with Action('Waiting for deploy to complete. Total of {} deploys'.format(deploy_count)) as act: while True: for deploy in deploy_output: deploy_status = elastigroup_api.deploy_status(deploy['id'], elastigroup_id, spotinst_account) for ds in deploy_status: if ds['id'] == deploy['id']: if ds['progress']['value'] >= 100\ or ds['status'].lower() in ELASTIGROUP_TERMINATED_DEPLOY_STATUS: deploys_finished += 1 info('Deploy {} finished with status {}'.format(ds['id'], ds['status'])) if deploys_finished == deploy_count: break time.sleep(2) act.progress()
def update_security_group(region_name: str, security_group: str, trusted_addresses: set): networks = trusted_addresses prefixlen = 31 # FIXME the Networkcount is depending on exist Entrys and Port-Count! while len(networks) > 50: networks = consolidate_networks(networks, prefixlen) prefixlen -= 1 info("{}/{} Prefixlen: {}, {} networks: {}".format(region_name, security_group, prefixlen, len(networks), networks)) conn = boto.ec2.connect_to_region(region_name) for sg in conn.get_all_security_groups(): if security_group in sg.name: for rule in sg.rules: info( "Entrys from {}: {} {} {} {}".format( sg.name, rule.ip_protocol, rule.from_port, rule.to_port, rule.grants ) ) ipgrants = [IPNetwork("{}".format(grant)) for grant in rule.grants] for grant in ipgrants: if grant not in networks: warning("Remove {} from security group {}".format(grant, sg.name)) sg.revoke( ip_protocol=rule.ip_protocol, from_port=rule.from_port, to_port=rule.to_port, cidr_ip=grant ) with Action("Updating security group {}..".format(sg.name)) as act: for cidr in sorted(networks): try: sg.authorize(ip_protocol="tcp", from_port=443, to_port=443, cidr_ip=cidr) except boto.exception.EC2ResponseError as e: if "already exists" not in e.message: raise act.progress()
def run_linter(spec_file, verbose: bool = False): spec = yaml.safe_load(spec_file) spec = compatibility_layer(spec) if verbose: info('Validating OpenAPI spec..') try: resolver = validate_spec(spec) except Exception as e: msg = 'Error during Swagger schema validation:\n{}'.format(e) return [ Issue( location='', message=msg, guideline='Must: Provide API Reference Definition using OpenAPI' ) ] # collect all "rules" defined as functions starting with "lint_" rules = [f for name, f in globals().items() if name.startswith('lint_')] issues = [] for func in rules: if verbose: info('Linting {}..'.format(func.__name__.split('_', 1)[-1])) for issue in func(spec, resolver): if isinstance(issue, tuple): location, message = issue else: location = issue message = None issues.append( Issue(location=location, message=message or '', guideline=func.__doc__)) return sorted(issues)
def create(stack_name, version, dry_run, instance_type, master_nodes, worker_nodes, max_worker_nodes, appdynamics_access_key): ''' Create a new Kubernetes cluster (using current AWS credentials) ''' variables = get_cluster_variables(stack_name, version, appdynamics_access_key) info('Cluster name is: {}'.format( variables['webhook_cluster_name'])) info('API server endpoint will be: {}'.format(variables['api_server'])) if dry_run: print(yaml.safe_dump(variables)) # TODO: register mint bucket with "kube-secretary" app if not has_etcd_cluster() and not dry_run: deploy_etcd_cluster(variables['hosted_zone'], variables['etcd_bucket'], variables['region']) tag_subnets() userdata_master = get_user_data('userdata-master.yaml', variables) userdata_worker = get_user_data('userdata-worker.yaml', variables) if not dry_run: subprocess.check_call([ 'senza', 'create', 'senza-definition.yaml', version, 'StackName={}'.format(stack_name), 'UserDataMaster={}'.format(userdata_master), 'UserDataWorker={}'.format(userdata_worker), 'KmsKey=*', 'MasterNodes={}'.format(master_nodes), 'WorkerNodes={}'.format(worker_nodes), 'MaximumWorkerNodes={}'.format(max_worker_nodes), 'InstanceType={}'.format(instance_type) ]) # wait up to 15m for stack to be created subprocess.check_call( ['senza', 'wait', '--timeout=900', stack_name, version]) wait_for_api_server(variables['api_server'])
def get_trusted_addresses(config: dict): accounts = config.get('accounts', {}) addresses = set() for name, cidr in config.get('global', {}).get('trusted_networks', {}).items(): info('Adding trusted network {} ({})'.format(name, cidr)) addresses.add(cidr) for account_name, _cfg in accounts.items(): cfg = {} cfg.update(config.get('global', {})) if _cfg: cfg.update(_cfg) for region in cfg['regions']: domains = set(['odd-{}.{}'.format(region, cfg.get('domain').format(account_name=account_name))]) for az in get_az_names(region): domains.add('nat-{}.{}'.format(az, cfg.get('domain').format(account_name=account_name))) for domain in sorted(domains): with Action('Checking {}'.format(domain)) as act: try: ai = socket.getaddrinfo(domain, 443, family=socket.AF_INET, type=socket.SOCK_STREAM) except: ai = [] act.error('n/a') pass for _, _, _, _, ip_port in ai: ip, _ = ip_port addresses.add('{}/32'.format(ip)) return addresses
def delete_old_services(config, application, version, release, execute): '''Delete old releases''' namespace = config.get('kubernetes_namespace') kubectl_login(config) data = kubectl_get(namespace, 'services', '-l', 'application={}'.format(application)) services = data['items'] target_service_name = '{}-{}'.format(application, release) services_to_delete = [] service_found = False for service in sorted(services, key=lambda d: d['metadata']['name'], reverse=True): service_name = service['metadata']['name'] if service_name == target_service_name: service_found = True else: services_to_delete.append(service_name) if not service_found: error('Service {} was not found.'.format(target_service_name)) raise click.Abort() for service_name in services_to_delete: info('Deleting service {}..'.format(service_name)) cluster_id = config.get('kubernetes_cluster') namespace = config.get('kubernetes_namespace') path = '/kubernetes-clusters/{}/namespaces/{}/services/{}'.format( cluster_id, namespace, service_name) response = request(config, requests.delete, path) change_request_id = response.json()['id'] if execute: approve_and_execute(config, change_request_id) else: print(change_request_id)
def output(output): '''Example for all possible Echo Formats You see the message only, if the Output TEXT ''' with OutputFormat(output): action('This is a ok:') ok() action('This is a ok with message:') ok('all is fine') action('This is a warning:') warning('please check this') with Action('Start with working..') as act: # save_the_world() act.progress() act.progress() act.progress() act.progress() print_table('id name'.split(), [{ 'id': 1, 'name': 'Test #1' }, { 'id': 2, 'name': 'Test #2' }]) info('Only FYI') action('This is a error:') error('this is wrong, please fix') action('This is a fatal error:') fatal_error('this is a fuckup') info('I\'am not printed, the process a dead')
def delete(config, type, resource, execute): '''Delete a Kubernetes resource or Cloud Formation stack''' if type == 'kubernetes': parts = resource.split('/') if len(parts) != 2: error('Kubernetes resource must be KIND/NAME') raise click.Abort() kind, name = parts info('Deleting Kubernetes {} {}..'.format(kind, name)) cluster_id = config.get('kubernetes_cluster') namespace = config.get('kubernetes_namespace') path = '/kubernetes-clusters/{}/namespaces/{}/{}/{}'.format( cluster_id, namespace, kind, name) else: info('Deleting Cloud Formation stack {}..'.format(resource)) aws_account = config.get('aws_account') aws_region = config.get('aws_region') path = '/aws-accounts/{}/regions/{}/cloudformation-stacks/{}'.format( aws_account, aws_region, resource) response = request(config, requests.delete, path) change_request_id = response.json()['id'] if execute: approve_and_execute(config, change_request_id) else: print(change_request_id)
def create(stack_name, version, dry_run): ''' Create a new Kubernetes cluster (using current AWS credentials) ''' variables = get_cluster_variables(stack_name, version) info('Cluster name is: {}'.format( variables['webhook_cluster_name'])) info('API server endpoint will be: {}'.format(variables['api_server'])) if dry_run: print(yaml.safe_dump(variables)) if not has_etcd_cluster() and not dry_run: deploy_etcd_cluster(variables['hosted_zone']) tag_subnets() userdata_master = get_user_data('userdata-master.yaml', variables) userdata_worker = get_user_data('userdata-worker.yaml', variables) if not dry_run: subprocess.check_call([ 'senza', 'create', 'senza-definition.yaml', version, 'StackName={}'.format(stack_name), 'UserDataMaster={}'.format(userdata_master), 'UserDataWorker={}'.format(userdata_worker), 'KmsKey=*' ]) # wait up to 15m for stack to be created subprocess.check_call( ['senza', 'wait', '--timeout=900', stack_name, version]) wait_for_api_server(variables['api_server'])
def update(stack_name, version, force): ''' Update Kubernetes cluster ''' user_data = get_launch_configuration_user_data(stack_name, version) worker_shared_secret = get_worker_shared_secret(user_data) variables = get_cluster_variables(stack_name, version, worker_shared_secret) userdata_master = get_user_data('userdata-master.yaml', variables) userdata_worker = get_user_data('userdata-worker.yaml', variables) # TODO: handle master nodes as well if not force and decode_user_data(user_data) == decode_user_data( userdata_worker): info('Worker user data did not change, not updating anything.') return # this will only update the Launch Configuration subprocess.check_call([ 'senza', 'update', 'senza-definition.yaml', version, 'StackName={}'.format(stack_name), 'UserDataMaster={}'.format(userdata_master), 'UserDataWorker={}'.format(userdata_worker), 'KmsKey=*' ]) # wait for CF update to complete.. subprocess.check_call(['senza', 'wait', stack_name, version]) perform_node_updates(stack_name, version, userdata_worker)
def scale_deployment(config, application, version, release, replicas, execute): '''Scale a single deployment''' namespace = config.get('kubernetes_namespace') kubectl_login(config) deployment_name = '{}-{}-{}'.format(application, version, release) info('Scaling deployment {} to {} replicas..'.format( deployment_name, replicas)) resources_update = ResourcesUpdate() resources_update.set_number_of_replicas(deployment_name, replicas) cluster_id = config.get('kubernetes_cluster') namespace = config.get('kubernetes_namespace') path = '/kubernetes-clusters/{}/namespaces/{}/resources'.format( cluster_id, namespace) response = request(config, requests.patch, path, json=resources_update.to_dict()) change_request_id = response.json()['id'] if execute: approve_and_execute(config, change_request_id) else: print(change_request_id)
def find_taupage_amis(regions: list) -> dict: ''' Find latest Taupage AMI for each region ''' result = {} for region in regions: with Action('Finding latest Taupage AMI in {}..'.format(region)): ec2 = boto3.resource('ec2', region) filters = [{ 'Name': 'name', 'Values': ['*Taupage-AMI-*'] }, { 'Name': 'is-public', 'Values': ['false'] }, { 'Name': 'state', 'Values': ['available'] }, { 'Name': 'root-device-type', 'Values': ['ebs'] }] images = list(ec2.images.filter(Filters=filters)) if not images: raise Exception('No Taupage AMI found') most_recent_image = sorted(images, key=lambda i: i.name)[-1] result[region] = most_recent_image info(most_recent_image.name) return result
def output(output): '''Example for all possible Echo Formats You see the message only, if the Output TEXT ''' with OutputFormat(output): action('This is a ok:') ok() action('This is a ok with message:') ok('all is fine') action('This is a warning:') warning('please check this') with Action('Start with working..') as act: # save_the_world() act.progress() act.progress() act.progress() act.progress() print_table('id name'.split(), [{'id': 1, 'name': 'Test #1'}, {'id': 2, 'name': 'Test #2'}]) info('Only FYI') action('This is a error:') error('this is wrong, please fix') action('This is a fatal error:') fatal_error('this is a fuckup') info('I\'am not printed, the process a dead')
def update(stack_name, version, force): ''' Update Kubernetes cluster ''' existing_user_data_master = get_launch_configuration_user_data( stack_name, version, 'Master') existing_user_data_worker = get_launch_configuration_user_data( stack_name, version, 'Worker') worker_shared_secret = get_worker_shared_secret(existing_user_data_worker) variables = get_cluster_variables(stack_name, version, worker_shared_secret) user_data_master = get_user_data('userdata-master.yaml', variables) user_data_worker = get_user_data('userdata-worker.yaml', variables) if not force and same_user_data( existing_user_data_master, user_data_master) and same_user_data( existing_user_data_worker, user_data_worker): info( 'Neither worker nor master user data did change, not updating anything.' ) return # this will only update the Launch Configuration subprocess.check_call([ 'senza', 'update', 'senza-definition.yaml', version, 'StackName={}'.format(stack_name), 'UserDataMaster={}'.format(user_data_master), 'UserDataWorker={}'.format(user_data_worker), 'KmsKey=*' ]) # wait for CF update to complete.. subprocess.check_call( ['senza', 'wait', '--timeout=600', stack_name, version]) perform_node_updates(stack_name, version, 'Master', user_data_master) wait_for_api_server(variables['api_server']) perform_node_updates(stack_name, version, 'Worker', user_data_worker)
def create(definition, region, version, parameter, disable_rollback, dry_run, force): '''Create a new Cloud Formation stack from the given Senza definition file''' input = definition region = get_region(region) check_credentials(region) account_info = AccountArguments(region=region) args = parse_args(input, region, version, parameter, account_info) with Action('Generating Cloud Formation template..'): data = evaluate(input.copy(), args, account_info, force) cfjson = json.dumps(data, sort_keys=True, indent=4) stack_name = "{0}-{1}".format(input["SenzaInfo"]["StackName"], version) if len(stack_name) > 128: raise click.UsageError('Stack name "{}" cannot exceed 128 characters. '.format(stack_name) + ' Please choose another name/version.') parameters = [] for name, parameter in data.get("Parameters", {}).items(): parameters.append([name, getattr(args, name, None)]) tags = {} for tag in input["SenzaInfo"].get('Tags', []): for key, value in tag.items(): # # As the SenzaInfo is not evaluated, we explicitly evaluate the values here tags[key] = evaluate_template(value, info, [], args) tags.update({ "Name": stack_name, "StackName": input["SenzaInfo"]["StackName"], "StackVersion": version }) if "OperatorTopicId" in input["SenzaInfo"]: topic = input["SenzaInfo"]["OperatorTopicId"] topic_arn = resolve_topic_arn(region, topic) if not topic_arn: raise click.UsageError('SNS topic "{}" does not exist'.format(topic)) topics = [topic_arn] else: topics = None capabilities = get_required_capabilities(data) cf = boto.cloudformation.connect_to_region(region) with Action('Creating Cloud Formation stack {}..'.format(stack_name)): try: if dry_run: info('**DRY-RUN** {}'.format(topics)) else: cf.create_stack(stack_name, template_body=cfjson, parameters=parameters, tags=tags, notification_arns=topics, disable_rollback=disable_rollback, capabilities=capabilities) except boto.exception.BotoServerError as e: if e.error_code == 'AlreadyExistsException': raise click.UsageError('Stack {} already exists. Please choose another version.'.format(stack_name)) else: raise
def wait_for_deployment(config, application, version, release, timeout, interval): '''Wait for all pods to become ready''' namespace = config.get('kubernetes_namespace') kubectl_login(config) deployment_name = '{}-{}-{}'.format(application, version, release) cutoff = time.time() + timeout while time.time() < cutoff: data = kubectl_get( namespace, 'pods', '-l', 'application={},version={},release={}'.format( application, version, release)) pods = data['items'] pods_ready = 0 for pod in pods: if pod['status'].get('phase') == 'Running': all_containers_ready = True for cont in pod['status'].get('containerStatuses', []): if not cont.get('ready'): all_containers_ready = False if all_containers_ready: pods_ready += 1 if pods and pods_ready >= len(pods): return info('Waiting up to {:.0f} more secs for deployment ' '{} ({}/{} pods ready)..'.format(cutoff - time.time(), deployment_name, pods_ready, len(pods))) time.sleep(interval) raise click.Abort()
def update(stack_name, version, dry_run, force, instance_type, master_nodes, worker_nodes, postpone, max_worker_nodes, appdynamics_access_key): ''' Update Kubernetes cluster ''' existing_user_data_master = get_launch_configuration_user_data( stack_name, version, 'Master') existing_user_data_worker = get_launch_configuration_user_data( stack_name, version, 'Worker') worker_shared_secret = get_worker_shared_secret(existing_user_data_worker) variables = get_cluster_variables(stack_name, version, appdynamics_access_key, worker_shared_secret) if dry_run: print(yaml.safe_dump(variables)) user_data_master = get_user_data('userdata-master.yaml', variables) user_data_worker = get_user_data('userdata-worker.yaml', variables) if instance_type == 'current': instance_type = get_launch_configuration(stack_name, version, 'Worker')['InstanceType'] if not force and same_user_data( existing_user_data_master, user_data_master) and same_user_data( existing_user_data_worker, user_data_worker): info( 'Neither worker nor master user data did change, not updating anything.' ) return if master_nodes == -1: master_nodes = get_current_master_nodes(stack_name, version) if worker_nodes == -1: worker_nodes = get_current_worker_nodes(stack_name, version) if not dry_run: # this will only update the Launch Configuration subprocess.check_call([ 'senza', 'update', 'senza-definition.yaml', version, 'StackName={}'.format(stack_name), 'UserDataMaster={}'.format(user_data_master), 'UserDataWorker={}'.format(user_data_worker), 'KmsKey=*', 'MasterNodes={}'.format(master_nodes), 'WorkerNodes={}'.format(worker_nodes), 'MaximumWorkerNodes={}'.format(max_worker_nodes), 'InstanceType={}'.format(instance_type) ]) # wait for CF update to complete.. subprocess.check_call( ['senza', 'wait', '--timeout=600', stack_name, version]) if not postpone: perform_node_updates(stack_name, version, 'Master', user_data_master, variables) wait_for_api_server(variables['api_server']) perform_node_updates(stack_name, version, 'Worker', user_data_worker, variables)
def setup_security_groups(use_dmz: bool, cluster_name: str, node_ips: dict, result: dict) -> dict: ''' Allow traffic between regions (or within a VPC, if `use_dmz' is False) ''' for region, ips in node_ips.items(): with Action('Configuring Security Group in {}..'.format(region)): ec2 = boto3.client('ec2', region) resp = ec2.describe_vpcs() # TODO: support more than one VPC.. vpc = resp['Vpcs'][0] sg_name = cluster_name sg = ec2.create_security_group(GroupName=sg_name, VpcId=vpc['VpcId'], Description='Allow Cassandra nodes to talk to each other on Secure Transport port 7001') result[region] = sg ec2.create_tags(Resources=[sg['GroupId']], Tags=[{'Key': 'Name', 'Value': sg_name}]) ip_permissions = [] if use_dmz: # NOTE: we need to allow ALL public IPs (from all regions) for ip in itertools.chain(*node_ips.values()): ip_permissions.append({ 'IpProtocol': 'tcp', 'FromPort': 7001, # port range: From-To 'ToPort': 7001, 'IpRanges': [{ 'CidrIp': '{}/32'.format(ip['PublicIp']) }] }) # if internal subnets are used we just allow access from # within the SG, which we also need in multi-region setup # (for the nodetool?) ip_permissions.append({'IpProtocol': '-1', 'UserIdGroupPairs': [{'GroupId': sg['GroupId']}]}) # if we can find the Odd security group, authorize SSH access from it try: resp = ec2.describe_security_groups(GroupNames=['Odd (SSH Bastion Host)']) odd_sg = resp['SecurityGroups'][0] ip_permissions.append({ 'IpProtocol': 'tcp', 'FromPort': 22, # port range: From-To 'ToPort': 22, 'UserIdGroupPairs': [{ 'GroupId': odd_sg['GroupId'] }] }) except ClientError: info("Could not find Odd bastion host in region {}, skipping Security Group rule.".format(region)) pass ec2.authorize_security_group_ingress(GroupId=sg['GroupId'], IpPermissions=ip_permissions)
def update_security_group(file, region_name, security_group): '''Update a Security Group and allow access from all trusted networks, NAT instances and bastion hosts''' config = yaml.safe_load(file) addresses = get_trusted_addresses(config) info('\n'.join(sorted(addresses))) update_security_group(region_name, security_group, addresses)
def perform_implicit_flow(config: dict): # Get new token success = False # Must match redirect URIs in client configuration (http://localhost:8081-8181) port_number = 8081 max_port_number = port_number + 100 while True: try: httpd = ClientRedirectServer(('127.0.0.1', port_number)) except socket.error as e: if port_number > max_port_number: success = False break port_number += 1 else: success = True break if success: params = {'response_type': 'token', 'business_partner_id': config['business_partner_id'], 'client_id': config['client_id'], 'redirect_uri': 'http://localhost:{}'.format(port_number)} param_list = ['{}={}'.format(key, value) for key, value in sorted(params.items())] param_string = '&'.join(param_list) parsed_authorize_url = urlparse(config['authorize_url']) browser_url = urlunsplit((parsed_authorize_url.scheme, parsed_authorize_url.netloc, parsed_authorize_url.path, param_string, '')) # Redirect stdout and stderr. In Linux, a message is outputted to stdout when opening the browser # (and then a message to stderr because it can't write). saved_stdout = os.dup(1) saved_stderr = os.dup(2) os.close(1) os.close(2) os.open(os.devnull, os.O_RDWR) try: webbrowser.open(browser_url, new=1, autoraise=True) finally: os.dup2(saved_stdout, 1) os.dup2(saved_stderr, 2) info('Your browser has been opened to visit:\n\n\t{}\n'.format(browser_url)) else: raise AuthenticationFailed('Failed to launch local server') while not httpd.query_params: # Handle first request, which will redirect to Javascript # Handle next request, with token httpd.handle_request() return httpd.query_params
def perform_implicit_flow(config: dict): # Get new token success = False # Must match redirect URIs in client configuration (http://localhost:8081-8181) port_number = 8081 max_port_number = port_number + 100 while True: try: httpd = ClientRedirectServer(('127.0.0.1', port_number)) except socket.error: if port_number > max_port_number: success = False break port_number += 1 else: success = True break if success: params = {'response_type': 'token', 'business_partner_id': config['business_partner_id'], 'client_id': config['client_id'], 'redirect_uri': 'http://localhost:{}'.format(port_number)} param_list = ['{}={}'.format(key, value) for key, value in sorted(params.items())] param_string = '&'.join(param_list) parsed_authorize_url = urlparse(config['authorize_url']) browser_url = urlunsplit((parsed_authorize_url.scheme, parsed_authorize_url.netloc, parsed_authorize_url.path, param_string, '')) # Redirect stdout and stderr. In Linux, a message is outputted to stdout when opening the browser # (and then a message to stderr because it can't write). saved_stdout = os.dup(1) saved_stderr = os.dup(2) os.close(1) os.close(2) os.open(os.devnull, os.O_RDWR) try: webbrowser.open(browser_url, new=1, autoraise=True) finally: os.dup2(saved_stdout, 1) os.dup2(saved_stderr, 2) info('Your browser has been opened to visit:\n\n\t{}\n'.format(browser_url)) else: raise AuthenticationFailed('Failed to launch local server') while not httpd.query_params: # Handle first request, which will redirect to Javascript # Handle next request, with token httpd.handle_request() return httpd.query_params
def get_base_ami_id(ec2_conn, cfg: dict): images = search_base_ami_ids(ec2_conn, cfg) if not images: permit_base_image(ec2_conn, cfg) images = search_base_ami_ids(ec2_conn, cfg) if not images: raise Exception("No AMI found") most_recent_image = sorted(images, key=lambda i: i.name)[-1] info('Most recent AMI is "{}" ({})'.format(most_recent_image.name, most_recent_image.id)) return most_recent_image.id
def pick_seed_node_ips(node_ips: dict, seed_count: int) -> dict: ''' Take first {seed_count} IPs in every region for the seed nodes. ''' seed_nodes = {} for region, ips in node_ips.items(): seed_nodes[region] = ips[0:seed_count] list_ips = ', '.join([ip['_defaultIp'] for ip in seed_nodes[region]]) info('Our seed nodes in {} will be: {}'.format(region, list_ips)) return seed_nodes
def pick_seed_node_ips(node_ips: dict, seed_count: int) -> dict: ''' Take first {seed_count} IPs in every region for the seed nodes. ''' seed_nodes = {} for region, ips in node_ips.items(): seed_nodes[region] = ips[0:seed_count] list_ips = [ip['_defaultIp'] for ip in seed_nodes[region]] info('Our seed nodes in {} will be: {}'.format(region, ', '.join(list_ips))) return seed_nodes
def apply(config, template_or_directory, parameter, execute): '''Apply CloudFormation or Kubernetes resource''' template_paths = [] if os.path.isdir(template_or_directory): for entry in os.listdir(template_or_directory): if entry.endswith('.yaml') and not entry.startswith('.'): template_paths.append(os.path.join(template_or_directory, entry)) else: template_paths.append(template_or_directory) context = parse_parameters(parameter) namespace = config.get('kubernetes_namespace') # try to find previous release of a service. data = kubectl_get(namespace, 'services', '-l', 'application={}'.format(context['application'])) context["prev_release"] = get_prev_release(data['items'], context['release']) for path in template_paths: with open(path, 'r') as fd: data = _render_template(fd, context) if not isinstance(data, dict): error('Invalid YAML contents in {}'.format(path)) raise click.Abort() if 'kind' in data: info('Applying Kubernetes manifest {}..'.format(path)) cluster_id = config.get('kubernetes_cluster') namespace = config.get('kubernetes_namespace') path = '/kubernetes-clusters/{}/namespaces/{}/resources'.format(cluster_id, namespace) response = request(config, requests.post, path, json=data) change_request_id = response.json()['id'] elif 'Resources' in data: info('Applying Cloud Formation template {}..'.format(path)) aws_account = config.get('aws_account') aws_region = config.get('aws_region') stack_name = data.get('Metadata', {}).get('StackName') if not stack_name: error('Cloud Formation template requires Metadata/StackName property') raise click.Abort() path = '/aws-accounts/{}/regions/{}/cloudformation-stacks/{}'.format( aws_account, aws_region, stack_name) response = request(config, requests.put, path, json=data) change_request_id = response.json()['id'] else: error('Neither a Kubernetes manifest nor a Cloud Formation template: {}'.format(path)) raise click.Abort() if execute: approve_and_execute(config, change_request_id) else: print(change_request_id)
def respawn_auto_scaling_group(asg_name: str, region: str, inplace: bool=False): '''Respawn all EC2 instances in the Auto Scaling Group whose launch configuration is not up-to-date''' asg = boto3.client('autoscaling', region) group = get_auto_scaling_group(asg, asg_name) desired_launch_config = group['LaunchConfigurationName'] instances_to_terminate, instances_ok = get_instances_to_terminate(group, desired_launch_config) info('{}/{} instances need to be updated in {}'.format(len(instances_to_terminate), len(instances_to_terminate) + len(instances_ok), asg_name)) if instances_to_terminate: do_respawn_auto_scaling_group(asg_name, group, region, instances_to_terminate, instances_ok, inplace) else: info('Nothing to do')
def switch_deployment(config, application, version, release, ratio, execute): '''Switch to new release''' namespace = config.get('kubernetes_namespace') kubectl_login(config) target_replicas, total = ratio.split('/') target_replicas = int(target_replicas) total = int(total) data = kubectl_get(namespace, 'deployments', '-l', 'application={}'.format(application)) deployments = data['items'] target_deployment_name = '{}-{}-{}'.format(application, version, release) target_deployment_exists = False for deployment in deployments: if deployment['metadata']['name'] == target_deployment_name: target_deployment_exists = True if not target_deployment_exists: error("Deployment {} does not exist!".format(target_deployment_name)) exit(1) resources_update = ResourcesUpdate() remaining_replicas = total - target_replicas for deployment in sorted(deployments, key=lambda d: d['metadata']['name'], reverse=True): deployment_name = deployment['metadata']['name'] if deployment_name == target_deployment_name: replicas = target_replicas else: # maybe spread across all other deployments? replicas = remaining_replicas remaining_replicas = 0 info('Scaling deployment {} to {} replicas..'.format( deployment_name, replicas)) resources_update.set_number_of_replicas(deployment_name, replicas) cluster_id = config.get('kubernetes_cluster') namespace = config.get('kubernetes_namespace') path = '/kubernetes-clusters/{}/namespaces/{}/resources'.format( cluster_id, namespace) response = request(config, requests.patch, path, json=resources_update.to_dict()) change_request_id = response.json()['id'] if execute: approve_and_execute(config, change_request_id) else: print(change_request_id)
def create(definition, region, version, parameter, disable_rollback, dry_run, force): '''Create a new Cloud Formation stack from the given Senza definition file''' input = definition region = get_region(region) check_credentials(region) args = parse_args(input, region, version, parameter) with Action('Generating Cloud Formation template..'): data = evaluate(input.copy(), args, force) cfjson = json.dumps(data, sort_keys=True, indent=4) stack_name = "{0}-{1}".format(input["SenzaInfo"]["StackName"], version) if len(stack_name) > 128: raise click.UsageError('Stack name "{}" cannot exceed 128 characters. '.format(stack_name) + ' Please choose another name/version.') parameters = [] for name, parameter in data.get("Parameters", {}).items(): parameters.append([name, getattr(args, name, None)]) tags = { "Name": stack_name, "StackName": input["SenzaInfo"]["StackName"], "StackVersion": version } if "OperatorTopicId" in input["SenzaInfo"]: topic = input["SenzaInfo"]["OperatorTopicId"] topic_arn = resolve_topic_arn(region, topic) if not topic_arn: raise click.UsageError('SNS topic "{}" does not exist'.format(topic)) topics = [topic_arn] else: topics = None capabilities = get_required_capabilities(data) cf = boto.cloudformation.connect_to_region(region) with Action('Creating Cloud Formation stack {}..'.format(stack_name)): try: if dry_run: info('**DRY-RUN** {}'.format(topics)) else: cf.create_stack(stack_name, template_body=cfjson, parameters=parameters, tags=tags, notification_arns=topics, disable_rollback=disable_rollback, capabilities=capabilities) except boto.exception.BotoServerError as e: if e.error_code == 'AlreadyExistsException': raise click.UsageError('Stack {} already exists. Please choose another version.'.format(stack_name)) else: raise
def main(): # parser = argparse.ArgumentParser() # parser.add_argument('from') # parser.add_argument('to') # parser.add_argument('file') # args = parser.parse_args() hosts_file = Path('/etc/hosts') with hosts_file.open() as fd: old_contents = fd.read() backup_file = hosts_file.with_suffix('.local-cname-backup') with backup_file.open('w') as fd: fd.write(old_contents) try: while True: entries = [] cname_file = Path('/etc/cnames') with cname_file.open() as fd: for line in fd: (cnameFrom, cnameTo) = line.strip().split('=') print('resoving:' + cnameTo) with Action('Resolving {} ..'.format(cnameTo)): results = socket.getaddrinfo(cnameTo, 80, type=socket.SOCK_STREAM) for result in results: family, type, proto, canonname, sockaddr = result if family in (socket.AF_INET, socket.AF_INET6): ip = sockaddr[0] entries.append((cnameFrom, ip)) info('Current entries:') for hostname, ip in entries: info('{} -> {}'.format(hostname, ip)) with Action('Writing {} ..'.format(hosts_file)): with hosts_file.open('w') as fd: fd.write(old_contents) fd.write( '#### Start of entries generated by local-cnames\n') for hostname, ip in entries: fd.write('{} {}\n'.format(ip, hostname)) time.sleep(60) except KeyboardInterrupt: # ignore, do not print stacktrace pass finally: backup_file.rename(hosts_file)
def get_named_token(scope, realm, name, user, password, url=None, insecure=False, refresh=False, use_keyring=True, prompt=False): '''get named access token, return existing if still valid''' if name and not refresh: existing_token = get_existing_token(name) if existing_token: return existing_token config = get_config() url = url or config.get('url') while not url and prompt: url = click.prompt('Please enter the OAuth access token service URL') if not url.startswith('http'): url = 'https://{}'.format(url) try: requests.get(url, timeout=5, verify=not insecure) except: error('Could not reach {}'.format(url)) url = None config['url'] = url stups_cli.config.store_config(config, 'zign') password = password or keyring.get_password(KEYRING_KEY, user) while True: if not password and prompt: password = click.prompt('Password for {}'.format(user), hide_input=True) try: result = get_new_token(realm, scope, user, password, url=url, insecure=insecure) break except AuthenticationFailed as e: if prompt: error(e) info('Please check your username and password and try again.') password = None else: raise if result and use_keyring: keyring.set_password(KEYRING_KEY, user, password) if name: store_token(name, result) return result
def main(): mai_accounts = mai.get_accounts() account = choice('Select account to login', mai_accounts) # type: str with Action("Login to pierone..") as login_action: if not pierone.login(): login_action.fatal_error('Failed') with Action("Login to AWS..") as login_action: if not mai.login(account): login_action.fatal_error('Failed') info('Logged in to {}'.format(account))
def print_help(): click.secho('Zalando Kubectl {}\n'.format(zalando_kubectl.__version__), bold=True) info('''Available wrapper commands: zkubectl help Show this help message and exit zkubectl configure --cluster-registry=URL Set the Cluster Registry URL zkubectl list Shortcut for "list-clusters" zkubectl list-clusters List all Kubernetes cluster in "ready" state zkubectl login CLUSTER_ALIAS_ID_OR_URL Login to a specific cluster zkubectl dashboard Open the Kubernetes dashboard UI in the browser All other commands are forwarded to kubectl: ''')
def print_success_message(options: dict): info('Cluster initialization completed successfully!') regions_list = ' '.join(options['regions']) # prepare alter keyspace params in the format: 'eu-central': N [, ...] dc_list = ', '.join([ "'{}': {}".format(re.sub('-[0-9]+$', '', r), options['cluster_size']) for r in options['regions'] ]) sys.stdout.write(''' The Cassandra cluster {cluster_name} was created with {cluster_size} nodes in each of the following AWS regions: {regions_list} You might need to update the Security Group named {cluster_name} (in all regions!) to allow access to Cassandra from the Odd host (port 22), from your application (port 9042) and optionally to allow access to Jolokia (port 8778) and/or Prometheus Node Exporter (port 9100) from your monitoring tool. You should now login to any of the cluster nodes to change the replication settings of system_auth keyspace and to create the admin superuser, using the following commands: $ docker exec -ti taupageapp bash (docker)$ cqlsh -u cassandra -p cassandra \\ -e "ALTER KEYSPACE system_auth WITH replication = {{ 'class': 'NetworkTopologyStrategy', {dc_list} }}; CREATE USER admin WITH PASSWORD '$ADMIN_PASSWORD' SUPERUSER;" Then login with the newly created admin account and disable the default superuser account: (docker)$ cqlsh -u admin -p $ADMIN_PASSWORD cqlsh> ALTER USER cassandra WITH PASSWORD '{random_pw}' NOSUPERUSER; You can then also create non-superuser application roles and data keyspace(s). In general, follow the documentation on setting up authentication, depending on your Cassandra version: http://docs.datastax.com/en/cassandra/3.0/cassandra/configuration/secureConfigNativeAuth.html http://docs.datastax.com/en/cassandra/2.1/cassandra/security/security_config_native_authenticate_t.html '''.format(**options, regions_list=regions_list, dc_list=dc_list, random_pw=generate_password()))
def update(definition, region, version, parameter, disable_rollback, dry_run, force): '''Update an existing Cloud Formation stack from the given Senza definition file''' data = create_cf_template(definition, region, version, parameter, force) cf = boto3.client('cloudformation', region) with Action('Updating Cloud Formation stack {}..'.format(data['StackName'])) as act: try: if dry_run: info('**DRY-RUN** {}'.format(data['NotificationARNs'])) else: del(data['Tags']) cf.update_stack(**data) except ClientError as e: act.fatal_error('ClientError: {}'.format(pformat(e.response)))
def create(definition, region, version, parameter, disable_rollback, dry_run, force): '''Create a new Cloud Formation stack from the given Senza definition file''' input = definition region = get_region(region) args = parse_args(input, region, version, parameter) with Action('Generating Cloud Formation template..'): data = evaluate(input.copy(), args, force) cfjson = json.dumps(data, sort_keys=True, indent=4) stack_name = "{0}-{1}".format(input["SenzaInfo"]["StackName"], version) parameters = [] for name, parameter in data.get("Parameters", {}).items(): parameters.append([name, getattr(args, name, None)]) tags = { "Name": stack_name, "StackName": input["SenzaInfo"]["StackName"], "StackVersion": version } if "OperatorTopicId" in input["SenzaInfo"]: topic = input["SenzaInfo"]["OperatorTopicId"] topic_arn = resolve_topic_arn(region, topic) if not topic_arn: raise click.UsageError('SNS topic "{}" does not exist'.format(topic)) topics = [topic_arn] else: topics = None capabilities = get_required_capabilities(data) cf = boto.cloudformation.connect_to_region(region) with Action('Creating Cloud Formation stack {}..'.format(stack_name)): try: if dry_run: info('**DRY-RUN** {}'.format(topics)) else: cf.create_stack(stack_name, template_body=cfjson, parameters=parameters, tags=tags, notification_arns=topics, disable_rollback=disable_rollback, capabilities=capabilities) except boto.exception.BotoServerError as e: if e.error_code == 'AlreadyExistsException': raise click.UsageError('Stack {} already exists. Please choose another version.'.format(stack_name)) else: raise
def launch_normal_nodes(options: dict): # TODO: parallelize by region? for region, ips in options['node_ips'].items(): subnets = options['subnets'][region] for i, ip in enumerate(ips): if i >= options['seed_count']: # avoid stating all nodes at the same time info("Sleeping for one minute before launching next node..") time.sleep(60) launch_instance(region, ip, ami=options['taupage_amis'][region], subnet_id=subnets[i % len(subnets)]['SubnetId'], security_group_id=options['security_groups'][region]['GroupId'], is_seed=False, options=options)
def _scale_deployment(config, name, namespace, replicas, execute): '''Scale a single deployment''' info('Scaling deployment {} to {} replicas..'.format(name, replicas)) resources_update = ResourcesUpdate() resources_update.set_number_of_replicas(name, replicas) cluster_id = config.get('kubernetes_cluster') path = '/kubernetes-clusters/{}/namespaces/{}/resources'.format(cluster_id, namespace) response = request(config, requests.patch, path, json=resources_update.to_dict()) change_request_id = response.json()['id'] if execute: approve_and_execute(config, change_request_id) else: print(change_request_id)
def _open_dashboard_in_browser(): import webbrowser # sleep some time to make sure "kubectl proxy" runs url = 'http://localhost:8001/api/v1/namespaces/kube-system/services/kubernetes-dashboard/proxy' with Action('Waiting for local kubectl proxy..') as act: for i in range(20): time.sleep(0.1) try: requests.get(url, timeout=2) except: act.progress() else: break info('\nOpening {} ..'.format(url)) webbrowser.open(url)
def test_echo(): action('Action..') ok() action('Action..') error(' some error') action('Action..') with pytest.raises(SystemExit): fatal_error(' some fatal error') # noqa action('Action..') warning(' some warning') info('Some info')
def launch_seed_nodes(options: dict): total_seed_count = options['seed_count'] * len(options['regions']) seeds_launched = 0 for region, ips in options['seed_nodes'].items(): subnets = options['subnets'][region] for i, ip in enumerate(ips): launch_instance(region, ip, ami=options['taupage_amis'][region], subnet_id=subnets[i % len(subnets)]['SubnetId'], security_group_id=options['security_groups'][region]['GroupId'], is_seed=True, options=options) seeds_launched += 1 if seeds_launched < total_seed_count: info("Sleeping for a minute before launching next SEED node..") time.sleep(60)
def status(config): """Check system status""" response = get('/status') data = response.json() info('Workers:') rows = [] for worker in data.get('workers', []): rows.append(worker) rows.sort(key=lambda x: x.get('name')) print_table(['name', 'check_invocations', 'last_execution_time'], rows) info('Queues:') rows = [] for queue in data.get('queues', []): rows.append(queue) rows.sort(key=lambda x: x.get('name')) print_table(['name', 'size'], rows)
def create(definition, region, version, parameter, disable_rollback, dry_run, force): '''Create a new Cloud Formation stack from the given Senza definition file''' data = create_cf_template(definition, region, version, parameter, force) cf = boto3.client('cloudformation', region) with Action('Creating Cloud Formation stack {}..'.format(data['StackName'])) as act: try: if dry_run: info('**DRY-RUN** {}'.format(data['NotificationARNs'])) else: cf.create_stack(DisableRollback=disable_rollback, **data) except ClientError as e: if e.response['Error']['Code'] == 'AlreadyExistsException': act.fatal_error('Stack {} already exists. Please choose another version.'.format(data['StackName'])) else: raise
def update(definition, region, version, parameter, disable_rollback, dry_run, force): '''Update an existing Cloud Formation stack from the given Senza definition file''' data = create_cf_template(definition, region, version, parameter, force) cf = boto3.client('cloudformation', region) with Action('Updating Cloud Formation stack {}..'.format( data['StackName'])) as act: try: if dry_run: info('**DRY-RUN** {}'.format(data['NotificationARNs'])) else: del (data['Tags']) cf.update_stack(**data) except ClientError as e: act.fatal_error('ClientError: {}'.format(pformat(e.response)))
def delete_deployment(config, deployment, execute): '''Delete deployment by first scaling down to 0, deleting the deployment resource and any replicaset resources owned by the deployment.''' cluster_id = config.get('kubernetes_cluster') name = deployment['metadata']['name'] namespace = deployment['metadata']['namespace'] # scale deployment to 0 before deleting _scale_deployment(config, name, namespace, 0, execute) # with for deployment to be scaled down to 0 timeout = DEFAULT_RESOURCE_DELETION_TIMEOUT maxtime = time.time() + timeout while get_replicas(name, namespace) > 0: if time.time() > maxtime: error('Timed out after {:d}s waiting for deployment to scale down'.format(timeout)) return # get replicasets owned by the deployment replicasets = kubectl_get(namespace, 'replicasets') owned_rs = get_owned_replicasets(deployment, replicasets['items']) # delete deployment info('Deleting deployment {}..'.format(name)) path = '/kubernetes-clusters/{}/namespaces/{}/deployments/{}'.format( cluster_id, namespace, name) response = request(config, requests.delete, path) change_request_id = response.json()['id'] if execute: approve_and_execute(config, change_request_id) else: print(change_request_id) # delete replicasets for rs in owned_rs: name = rs['metadata']['name'] info('Deleting replicaset {}..'.format(name)) path = '/kubernetes-clusters/{}/namespaces/{}/replicasets/{}'.format( cluster_id, namespace, name) response = request(config, requests.delete, path) change_request_id = response.json()['id'] if execute: approve_and_execute(config, change_request_id) else: print(change_request_id)
def create(definition: str, image_version: str, keep_stacks: int, traffic: int, verbose: bool, senza_parameters: list, app_version: Optional[str], stack_version: Optional[str], disable_rollback: bool): senza_parameters = senza_parameters or [] config = Configuration() access_token = fetch_token(config.token_url, config.scopes, config.credentials_dir) lizzy = Lizzy(config.lizzy_url, access_token) with Action('Requesting new stack..') as action: try: stack_id = lizzy.new_stack(image_version, keep_stacks, traffic, definition, stack_version, app_version, disable_rollback, senza_parameters) except requests.RequestException as e: action.fatal_error('Deployment failed: {}.'.format(e)) info('Stack ID: {}'.format(stack_id)) with Action('Waiting for new stack...') as action: if verbose: print( ) # ensure that new states will not be printed on the same line as the action last_state = None for state in lizzy.wait_for_deployment(stack_id): if state != last_state and verbose: click.echo(' {}'.format(state)) else: action.progress() last_state = state if last_state == 'CF:ROLLBACK_COMPLETE': fatal_error( 'Stack was rollback after deployment. Check you application log for possible reasons.' ) elif last_state == 'LIZZY:REMOVED': fatal_error('Stack was removed before deployment finished.') elif last_state != 'CF:CREATE_COMPLETE': fatal_error('Deployment failed: {}'.format(last_state)) info('Deployment Successful')
def validate_artifact_version(options: dict) -> dict: conflict_options_msg = """Conflicting options: --artifact-name and --docker-image cannot be specified at the same time""" if not options['docker_image']: if not options['artifact_name']: options['artifact_name'] = 'planb-cassandra-3.0' image_version = get_latest_docker_image_version( options['artifact_name']) docker_image = 'registry.opensource.zalan.do/stups/{}:{}' \ .format(options['artifact_name'], image_version) info('Using docker image: {}'.format(docker_image)) else: if options['artifact_name']: raise click.UsageError(conflict_options_msg) image_version = options['docker_image'].split(':')[-1] docker_image = options['docker_image'] return dict(options, docker_image=docker_image, image_version=image_version)
def saml_login(user, url): ring_user = '******'.format(user, url) saml_password = keyring.get_password('mai', ring_user) saml_xml = None while not saml_xml: if not saml_password: saml_password = click.prompt('Please enter your SAML password', hide_input=True) with Action('Authenticating against {url}..', url=url) as act: try: saml_xml, roles = authenticate(url, user, saml_password) except aws_saml_login.saml.AuthenticationFailed: act.error('Authentication Failed') info('Please check your username/password and try again.') saml_password = None keyring.set_password('mai', ring_user, saml_password) return saml_xml, roles
def configure_dns(account_name, cfg): dns_domain = cfg.get("domain").format(account_name=account_name) # NOTE: hardcoded region as Route53 is region-independent conn = boto3.client("route53") zone = conn.list_hosted_zones_by_name(DNSName=dns_domain + ".")["HostedZones"] if not zone: with Action("Creating hosted zone.."): conn.create_hosted_zone( Name=dns_domain + ".", CallerReference="sevenseconds-" + dns_domain, HostedZoneConfig={"Comment": "Public Hosted Zone"}, ) zone = conn.list_hosted_zones_by_name(DNSName=dns_domain + ".")["HostedZones"][0] nameservers = conn.get_hosted_zone(Id=zone["Id"])["DelegationSet"]["NameServers"] info("Hosted zone for {} has nameservers {}".format(dns_domain, nameservers)) with Action("Set up DNS Delegation..") as act: try: configure_dns_delegation(account_name, nameservers, cfg) except: act.error("DNS Delegation not possible") soa_ttl = cfg.get("domain_soa_ttl", "60") with Action("Set SOA-TTL to {}..".format(soa_ttl)): rr_list = conn.list_resource_record_sets( HostedZoneId=zone["Id"], StartRecordType="SOA", StartRecordName=zone["Name"] ) rr = rr_list["ResourceRecordSets"][0]["ResourceRecords"] changebatch = { "Comment": "updated SOA TTL", "Changes": [ { "Action": "UPSERT", "ResourceRecordSet": { "Name": zone["Name"], "Type": "SOA", "TTL": int(soa_ttl), "ResourceRecords": rr, }, } ], } conn.change_resource_record_sets(HostedZoneId=zone["Id"], ChangeBatch=changebatch) return dns_domain
def find_taupage_amis(regions: list) -> dict: ''' Find latest Taupage AMI for each region ''' result = {} for region in regions: with Action('Finding latest Taupage AMI in {}..'.format(region)): ec2 = boto3.resource('ec2', region) filters = [{'Name': 'name', 'Values': ['*Taupage-AMI-*']}, {'Name': 'is-public', 'Values': ['false']}, {'Name': 'state', 'Values': ['available']}, {'Name': 'root-device-type', 'Values': ['ebs']}] images = list(ec2.images.filter(Filters=filters)) if not images: raise Exception('No Taupage AMI found') most_recent_image = sorted(images, key=lambda i: i.name)[-1] result[region] = most_recent_image info(most_recent_image.name) return result
def create(definition: str, image_version: str, keep_stacks: int, traffic: int, verbose: bool, senza_parameters: list, app_version: Optional[str], stack_version: Optional[str], disable_rollback: bool): senza_parameters = senza_parameters or [] config = Configuration() access_token = fetch_token(config.token_url, config.scopes, config.credentials_dir) lizzy = Lizzy(config.lizzy_url, access_token) with Action('Requesting new stack..') as action: try: stack_id = lizzy.new_stack(image_version, keep_stacks, traffic, definition, stack_version, app_version, disable_rollback, senza_parameters) except requests.RequestException as e: action.fatal_error('Deployment failed: {}.'.format(e)) info('Stack ID: {}'.format(stack_id)) with Action('Waiting for new stack...') as action: if verbose: print() # ensure that new states will not be printed on the same line as the action last_state = None for state in lizzy.wait_for_deployment(stack_id): if state != last_state and verbose: click.echo(' {}'.format(state)) else: action.progress() last_state = state if last_state == 'CF:ROLLBACK_COMPLETE': fatal_error('Stack was rollback after deployment. Check you application log for possible reasons.') elif last_state == 'LIZZY:REMOVED': fatal_error('Stack was removed before deployment finished.') elif last_state != 'CF:CREATE_COMPLETE': fatal_error('Deployment failed: {}'.format(last_state)) info('Deployment Successful')
def print_success_message(options: dict): info('Cluster initialization completed successfully!') sys.stdout.write(''' The Cassandra cluster {cluster_name} was created with {cluster_size} nodes in each of the following AWS regions: {regions_list} You can now login to any of the cluster nodes with the superuser account using the following command: $ cqlsh -u cassandra -p '{admin_password}' From there you can create non-superuser roles and otherwise configure the cluster. You might also need to update the Security Groups named {cluster_name} (in all regions!) to allow access to Cassandra from your application (port 9042) and optionally to allow access to Jolokia (port 8778) and/or Prometheus Node Exporter (port 9100) from your monitoring tool. '''.format(**options, regions_list=' '.join(options['regions']), admin_password=options['user_data']['environment']['ADMIN_PASSWORD']))
def update_stack_from_template(region: str, template: dict, dry_run: bool): """ Updates a stack from a generated template """ cf = BotoClientProxy('cloudformation', region) del (template['Tags']) with Action('Updating Cloud Formation stack ' '{StackName}..'.format_map(template)) as act: try: if dry_run: info('**DRY-RUN** {}'.format(template['NotificationARNs'])) else: cf.update_stack(**template) except ClientError as err: response = err.response error_info = response['Error'] error_message = error_info['Message'] if error_message == 'No updates are to be performed.': act.ok('NO UPDATE') else: act.fatal_error('ClientError: {}'.format(pformat(response)))
def configure_cloudtrail(account_name, region, cfg, dry_run): if "cloudtrail" not in cfg: info("Found no Cloudtrail Section in Configfile. Skipping CloudTrail configuration") return cloudtrail = boto3.client("cloudtrail", region) trails = cloudtrail.describe_trails()["trailList"] name = "Default" trail = find_trail(trails, name) kwargs = dict( Name=name, S3BucketName=cfg["cloudtrail"]["s3_bucket_name"], S3KeyPrefix=cfg["cloudtrail"]["s3_key_prefix"], IncludeGlobalServiceEvents=True, ) if trail: with Action("[{}] Check CloudTrail..".format(region)) as act: if not dry_run: if ( trail["IncludeGlobalServiceEvents"] != kwargs["IncludeGlobalServiceEvents"] or trail["S3KeyPrefix"] != kwargs["S3KeyPrefix"] or trail["S3BucketName"] != kwargs["S3BucketName"] ): act.error("wrong configuration") cloudtrail.update_trail(**kwargs) status = cloudtrail.get_trail_status(Name=name) if not status["IsLogging"]: act.error("was not active") cloudtrail.start_logging(Name=name) else: if trails: for trail in trails: delname = trail.get("Name") with Action("[{}] Deleting invalid trail {}..".format(region, delname)): if not dry_run: cloudtrail.stop_logging(Name=delname) cloudtrail.delete_trail(Name=delname) with Action("[{}] Enabling CloudTrail..".format(region)): if not dry_run: cloudtrail.create_trail(**kwargs) cloudtrail.start_logging(Name=name)
def saml_login(user, url): ring_user = "******".format(user, url) saml_password = keyring.get_password("mai", ring_user) saml_xml = None while not saml_xml: if not saml_password: saml_password = click.prompt("Please enter your SAML password", hide_input=True) saml_otp = click.prompt("Please enter your One-Time-Password (eg. Google Authenticate or Yubikey)") with Action("Authenticating against {url}..", url=url) as act: try: saml_xml, roles = authenticate(url, user, saml_password, saml_otp) except aws_saml_login.saml.AuthenticationFailed: act.error("Authentication Failed") info("Please check your username/password/token and try again.") saml_password = None saml_otp = None keyring.set_password("mai", ring_user, saml_password) return saml_xml, roles
def all_stacks_in_final_state(related_stacks_refs: list, region: str, timeout: Optional[int], interval: int): ''' Wait and check if all related stacks are in a final state before performing code block changes. If there is no timeout, we don't wait anything and just execute the traffic change. :param related_stacks_refs: Related stacks to wait :param region: region where stacks are present :param timeout: optional value of how long we should wait for the stack should be `None` :param interval: interval between checks using AWS CF API ''' if timeout is None or timeout < 1: yield else: wait_timeout = datetime.datetime.utcnow() + datetime.timedelta(seconds=timeout) all_in_final_state = False while not all_in_final_state and wait_timeout > datetime.datetime.utcnow(): # assume all stacks are ready all_in_final_state = True related_stacks = list(get_stacks(related_stacks_refs, region)) if not related_stacks: error("Stack not found!") exit(1) for related_stack in related_stacks: current_stack_status = related_stack.StackStatus if current_stack_status.endswith('_IN_PROGRESS'): # some operation in progress, let's wait some time to try again all_in_final_state = False info( "Waiting for stack {} ({}) to perform requested operation..".format( related_stack.StackName, current_stack_status)) time.sleep(interval) if datetime.datetime.utcnow() > wait_timeout: info("Timeout reached, requested operation not executed.") exit(1) else: yield
def create(definition: dict, version: str, parameter: tuple, region: str, disable_rollback: bool, dry_run: bool, force: bool, tag: List[str], timeout: int, keep_stacks: Optional[int], traffic: int, verbose: bool, remote: str, parameter_file: Optional[str] ): """ Create a new Cloud Formation stack from the given Senza definition file """ lizzy = setup_lizzy_client(remote) parameter = list(parameter) or [] if parameter_file: parameter.extend(read_parameter_file(parameter_file)) if not force: # pragma: no cover # supporting artifact checking would imply copying a large amount of code # from senza, so it should be considered out of scope until senza # and lizzy client are merged warning("WARNING: " "Artifact checking is still not supported by lizzy-client.") with Action('Requesting new stack..') as action: new_stack, output = lizzy.new_stack(keep_stacks, traffic, definition, version, disable_rollback, parameter, region=region, dry_run=dry_run, tags=tag) stack_id = '{stack_name}-{version}'.format_map(new_stack) print(output) info('Stack ID: {}'.format(stack_id)) if dry_run: info("Post deployment steps skipped") exit(0) with Action('Waiting for new stack...') as action: if verbose: print() # ensure that new states will not be printed on the same line as the action last_state = None for state in lizzy.wait_for_deployment(stack_id, region=region): if state != last_state and verbose: click.echo(' {}'.format(state)) else: action.progress() last_state = state # TODO be prepared to handle all final AWS CF states if last_state == 'ROLLBACK_COMPLETE': fatal_error( 'Stack was rollback after deployment. Check your application log for possible reasons.') elif last_state != 'CREATE_COMPLETE': fatal_error('Deployment failed: {}'.format(last_state)) info('Deployment Successful') if traffic is not None: with Action('Requesting traffic change..'): try: lizzy.traffic(stack_id, traffic, region=region) except requests.ConnectionError as e: connection_error(e, fatal=False) except requests.HTTPError as e: agent_error(e, fatal=False) # TODO unit test this if keep_stacks is not None: versions_to_keep = keep_stacks + 1 stacks_to_remove_counter = 1 end_time = datetime.datetime.utcnow() + datetime.timedelta(seconds=timeout) while stacks_to_remove_counter > 0 and datetime.datetime.utcnow() <= end_time: try: all_stacks = lizzy.get_stacks([new_stack['stack_name']], region=region) except requests.ConnectionError as e: connection_error(e, fatal=False) error("Failed to fetch old stacks. " "Old stacks WILL NOT BE DELETED") exit(1) except requests.HTTPError as e: agent_error(e, fatal=False) error("Failed to fetch old stacks. " "Old stacks WILL NOT BE DELETED") exit(1) else: sorted_stacks = sorted(all_stacks, key=lambda stack: stack['creation_time']) stacks_to_remove = sorted_stacks[:-versions_to_keep] stacks_to_remove_counter = len(stacks_to_remove) with Action('Deleting old stacks..'): print() for old_stack in stacks_to_remove: old_stack_id = '{stack_name}-{version}'.format_map( old_stack) if old_stack['status'] in COMPLETE_STATES: click.echo(' {}'.format(old_stack_id)) try: lizzy.delete(old_stack_id, region=region) stacks_to_remove_counter -= 1 except requests.ConnectionError as e: connection_error(e, fatal=False) except requests.HTTPError as e: agent_error(e, fatal=False) else: click.echo(' > {} current status is {} trying ' 'again later'.format(old_stack_id, old_stack['status'])) if stacks_to_remove_counter > 0: time.sleep(5) if datetime.datetime.utcnow() > end_time: click.echo('Timeout waiting for related stacks to be ready.')