def create(obj, profile_name, url, user): '''Create a new profile''' if not url.startswith('http'): url = 'https://{}'.format(url) saml_xml, roles = saml_login(user, url) if not roles: error('No roles found') exit(1) if len(roles) == 1: role = roles[0] if role[2] is None: role = (role[0], role[1], profile_name) else: role = choice('Please select one role', [(r, get_role_label(r)) for r in sorted(roles)]) data = obj['config'] if not data: data = {} data[profile_name] = { 'saml_identity_provider_url': url, 'saml_role': role, 'saml_user': user } path = obj['config-file'] with Action('Storing new profile in {}..'.format(path)): os.makedirs(obj['config-dir'], exist_ok=True) with open(path, 'w') as fd: yaml.safe_dump(data, fd)
def delete(config, type, resource, execute): '''Delete a Kubernetes resource or Cloud Formation stack''' if type == 'kubernetes': parts = resource.split('/') if len(parts) != 2: error('Kubernetes resource must be KIND/NAME') raise click.Abort() kind, name = parts info('Deleting Kubernetes {} {}..'.format(kind, name)) cluster_id = config.get('kubernetes_cluster') namespace = config.get('kubernetes_namespace') path = '/kubernetes-clusters/{}/namespaces/{}/{}/{}'.format( cluster_id, namespace, kind, name) else: info('Deleting Cloud Formation stack {}..'.format(resource)) aws_account = config.get('aws_account') aws_region = config.get('aws_region') path = '/aws-accounts/{}/regions/{}/cloudformation-stacks/{}'.format( aws_account, aws_region, resource) response = request(config, requests.delete, path) change_request_id = response.json()['id'] if execute: approve_and_execute(config, change_request_id) else: print(change_request_id)
def output(output): '''Example for all possible Echo Formats You see the message only, if the Output TEXT ''' with OutputFormat(output): action('This is a ok:') ok() action('This is a ok with message:') ok('all is fine') action('This is a warning:') warning('please check this') with Action('Start with working..') as act: # save_the_world() act.progress() act.progress() act.progress() act.progress() print_table('id name'.split(), [{ 'id': 1, 'name': 'Test #1' }, { 'id': 2, 'name': 'Test #2' }]) info('Only FYI') action('This is a error:') error('this is wrong, please fix') action('This is a fatal error:') fatal_error('this is a fuckup') info('I\'am not printed, the process a dead')
def push_entity(ctx, entity): if entity[-4:] == "json" and os.path.exists(entity): with open(entity, 'rb') as file: entity = file.read() data = json.loads(entity.decode()) elif entity[-4:] == 'yaml' and os.path.exists(entity): with open(entity, 'rb') as fd: data = yaml.safe_load(fd) else: data = json.loads(entity) if not isinstance(data, list): data = [data] for e in data: action("creating entity...{}".format(e['id'])) try: entity = json.dumps(e) r = put('/entities/', entity) if r.status_code == 200: ok() else: error() except: error("failed")
def gather_user_variables(variables, region): prompt(variables, 'wal_s3_bucket', 'Postgres WAL S3 bucket to use', default='zalando-spilo-app') prompt(variables, 'instance_type', 'EC2 instance type', default='t2.micro') prompt(variables, 'hosted_zone', 'Hosted Zone', default=get_default_zone(region) or 'example.com') if (variables['hosted_zone'][-1:] != '.'): variables['hosted_zone'] += '.' prompt(variables, 'discovery_url', 'ETCD Discovery URL', default='postgres.'+variables['hosted_zone'][:-1]) variables['postgres_port'] = POSTGRES_PORT variables['healthcheck_port'] = HEALTHCHECK_PORT sg_name = 'app-spilo' variables['spilo_sg_id'] = get_security_group(region, sg_name).id rules_missing = check_security_group(sg_name, [('tcp', 22), ('tcp', POSTGRES_PORT), ('tcp', HEALTHCHECK_PORT)], region, allow_from_self=True) if ('tcp', 22) in rules_missing: warning('Security group {} does not allow SSH access, you will not be able to ssh into your servers'.format( sg_name)) if ('tcp', POSTGRES_PORT) in rules_missing: error('Security group {} does not allow inbound TCP traffic on the default postgres port ({})'.format( sg_name, POSTGRES_PORT )) if ('tcp', HEALTHCHECK_PORT) in rules_missing: error('Security group {} does not allow inbound TCP traffic on the default health check port ({})'.format( sg_name, HEALTHCHECK_PORT )) check_s3_bucket(variables['wal_s3_bucket'], region) return variables
def traffic(config, application, release, percent, execute): cluster_id = config.get('kubernetes_cluster') namespace = config.get('kubernetes_namespace') ingress = kubectl_get(namespace, 'ingresses', application) if release is None and percent is None: print(json.dumps(get_ingress_backends(ingress))) return backend = '{}-{}'.format(application, release) backend_weights = calculate_backend_weights(ingress, backend, percent) if len(backend_weights) == 0: error('Failed to find ingress backends {}'.format(backend)) raise click.Abort() # update ingress resource resources_update = ResourcesUpdate() resources_update.set_annotation(application, # ~1 == / in json patch INGRESS_BACKEND_WEIGHT_ANNOTATION_KEY.replace('/', '~1'), json.dumps(backend_weights), 'ingresses') path = '/kubernetes-clusters/{}/namespaces/{}/resources'.format(cluster_id, namespace) response = request(config, requests.patch, path, json=resources_update.to_dict()) change_request_id = response.json()['id'] if execute: approve_and_execute(config, change_request_id) else: print(change_request_id)
def get_config_data(config_file=DEFAULT_CONFIG_FILE): fn = os.path.expanduser(config_file) data = {} try: if os.path.exists(fn): with open(fn) as fd: data = yaml.safe_load(fd) else: clickclick.warning( 'No configuration file found at [{}]'.format(config_file)) data['url'] = click.prompt( 'ZMON Base URL (e.g. https://zmon.example.org/api/v1)') # TODO: either ask for fixed token or Zign data['user'] = click.prompt('ZMON username', default=os.environ['USER']) with open(fn, mode='w') as fd: yaml.dump(data, fd, default_flow_style=False, allow_unicode=True, encoding='utf-8') except Exception as e: error(e) return validate_config(data)
def print_problem(error): problem = error.response.json() clickclick.error('Failed.') clickclick.error('Status: {}'.format(problem['status'])) clickclick.error('Title: {}'.format(problem['title'])) clickclick.error('Details: {}'.format(problem['detail'])) clickclick.error('FlowId: {}'.format(problem['flow_id']))
def output(output): '''Example for all possible Echo Formats You see the message only, if the Output TEXT ''' with OutputFormat(output): action('This is a ok:') ok() action('This is a ok with message:') ok('all is fine') action('This is a warning:') warning('please check this') with Action('Start with working..') as act: # save_the_world() act.progress() act.progress() act.progress() act.progress() print_table('id name'.split(), [{'id': 1, 'name': 'Test #1'}, {'id': 2, 'name': 'Test #2'}]) info('Only FYI') action('This is a error:') error('this is wrong, please fix') action('This is a fatal error:') fatal_error('this is a fuckup') info('I\'am not printed, the process a dead')
def login(obj, url, realm, name, user, password): '''Login to Pier One Docker registry (generates ~/.dockercfg''' config = obj url = url or config.get('url') user = user or os.getenv('USER') while not url: url = click.prompt('Please enter the Pier One URL') if not url.startswith('http'): url = 'https://{}'.format(url) try: requests.get(url, timeout=5) except: error('Could not reach {}'.format(url)) url = None config['url'] = url os.makedirs(CONFIG_DIR_PATH, exist_ok=True) with open(CONFIG_FILE_PATH, 'w') as fd: yaml.dump(config, fd) docker_login(url, realm, name, user, password, prompt=True)
def add_phone(ctx, member_email, phone_nr): action("Adding phone ....") r = put("/groups/{}/phone/{}/".format(member_email, phone_nr)) if r.text == '1': ok() else: error("failed to set phone")
def delete_old_services(config, application, version, release, execute): '''Delete old releases''' namespace = config.get('kubernetes_namespace') kubectl_login(config) data = kubectl_get(namespace, 'services', '-l', 'application={}'.format(application)) services = data['items'] target_service_name = '{}-{}'.format(application, release) services_to_delete = [] service_found = False for service in sorted(services, key=lambda d: d['metadata']['name'], reverse=True): service_name = service['metadata']['name'] if service_name == target_service_name: service_found = True else: services_to_delete.append(service_name) if not service_found: error('Service {} was not found.'.format(target_service_name)) raise click.Abort() for service_name in services_to_delete: info('Deleting service {}..'.format(service_name)) cluster_id = config.get('kubernetes_cluster') namespace = config.get('kubernetes_namespace') path = '/kubernetes-clusters/{}/namespaces/{}/services/{}'.format( cluster_id, namespace, service_name) response = request(config, requests.delete, path) change_request_id = response.json()['id'] if execute: approve_and_execute(config, change_request_id) else: print(change_request_id)
def remove_phone(ctx, member_email, phone_nr): action("Removing phone number ....") r = delete("/groups/{}/phone/{}/".format(member_email, phone_nr)) if r.text == '1': ok() else: error("failed to remove phone")
def gather_user_variables(variables, region): prompt(variables, 'application_id', 'Application ID', default='hello-world') prompt(variables, 'docker_image', 'Docker image without tag/version (e.g. "pierone.example.org/myteam/myapp")', default='stups/hello-world') prompt(variables, 'http_port', 'HTTP port', default=8080, type=int) prompt(variables, 'http_health_check_path', 'HTTP health check path', default='/') prompt(variables, 'instance_type', 'EC2 instance type', default='t2.micro') prompt(variables, 'mint_bucket', 'Mint S3 bucket name', default=lambda: get_mint_bucket_name(region)) http_port = variables['http_port'] sg_name = 'app-{}'.format(variables['application_id']) rules_missing = check_security_group(sg_name, [('tcp', 22), ('tcp', http_port)], region, allow_from_self=True) if ('tcp', 22) in rules_missing: warning('Security group {} does not allow SSH access, you will not be able to ssh into your servers'.format( sg_name)) if ('tcp', http_port) in rules_missing: error('Security group {} does not allow inbound TCP traffic on the specified HTTP port ({})'.format( sg_name, http_port )) rules_missing = check_security_group(sg_name + '-lb', [('tcp', 443)], region) if rules_missing: error('Load balancer security group {} does not allow inbound HTTPS traffic'.format(sg_name)) check_iam_role(variables['application_id'], variables['mint_bucket'], region) return variables
def group_remove(ctx, group_name, user_name): action("Removing user ....") r = delete("/groups/{}/member/{}/".format(group_name, user_name)) if r.text == '1': ok() else: error("failed to remove")
def __call__(self, *args, **kwargs): try: self.function(*args, **kwargs) except NoCredentialsError: self.die_credential_error() except ClientError as e: sys.stdout.flush() if is_credentials_expired_error(e): print('AWS credentials have expired.\n' 'Use the "mai" command line tool to get a new' ' temporary access key.', file=sys.stderr) sys.exit(1) elif is_access_denied_error(e): self.die_credential_error() else: self.die_unknown_error(e) except yaml.constructor.ConstructorError as e: print("Error parsing definition file:") print(e) if e.problem == "found unhashable key": print("Please quote all variable values") sys.exit(1) except PiuNotFound as e: error(e) print("You can install piu with the following command:", file=sys.stderr) print("sudo pip3 install --upgrade stups-piu", file=sys.stderr) sys.exit(1) except Exception as e: # Catch All self.die_unknown_error(e)
def group_add(ctx, group_name, user_name): action("Adding user ....") r = put("/groups/{}/member/{}/".format(group_name, user_name)) if r.text == '1': ok() else: error("failed to insert")
def updateAlertDef(yaml_file): """update a single check definition""" data = get_config_data() post = yaml.safe_load(yaml_file) post['last_modified_by'] = data['user'] if 'status' not in post: post['status'] = 'ACTIVE' action('Updating alert definition..') if 'id' not in post: error('"id" missing in definition') return if 'check_definition_id' not in post: error('"check_definition_id" missing in definition') return alert_id = post['id'] r = requests.put(data['url'] + '/alert-definitions/{}'.format(alert_id), json.dumps(post), auth=HTTPBasicAuth(data['user'], data['password']), headers={'Content-Type': 'application/json'}) if r.status_code == 200: ok(get_config_data()["url"].replace("rest/api/v1", "") + "#/alert-details/" + str(r.json()["id"])) else: print(r.text)
def get_config_data(): fn = os.path.expanduser(DEFAULT_CONFIG_FILE) data = {} try: if os.path.exists(fn): with open(fn) as fd: data = yaml.safe_load(fd) if 'password' in data: keyring.set_password("zmon-cli", data['user'], data['password']) del data['password'] with open(fn, mode='w') as fd: yaml.dump(data, fd, default_flow_style=False, allow_unicode=True, encoding='utf-8') else: clickclick.warning("No configuration file found at [{}]".format(DEFAULT_CONFIG_FILE)) data['url'] = click.prompt("ZMon Base URL (e.g. https://zmon2.local/rest/api/v1)") data['user'] = click.prompt("ZMon username", default=os.environ['USER']) with open(fn, mode='w') as fd: yaml.dump(data, fd, default_flow_style=False, allow_unicode=True, encoding='utf-8') except Exception as e: error(e) return validate_config(data)
def create_all(obj, url, user): """Create for all roles a new own profile""" if not url.startswith("http"): url = "https://{}".format(url) saml_xml, roles = saml_login(user, url) if not roles: error("No roles found") exit(1) data = obj["config"] if not data: data = {} if len(roles) == 1: if roles[0][2] is None: roles = [(roles[0][0], roles[0][1], "default")] for r in sorted(roles): provider_arn, role_arn, name = r name = name or "unknown" # name is sometimes missing profile_name = "{}-{}".format(name.split("-", maxsplit=1)[-1], role_arn.split("-", maxsplit=1)[-1]) data[profile_name] = {"saml_identity_provider_url": url, "saml_role": r, "saml_user": user} path = obj["config-file"] with Action("Storing new profile in {}..".format(path)): os.makedirs(obj["config-dir"], exist_ok=True) with open(path, "w") as fd: yaml.safe_dump(data, fd)
def create(obj, profile_name, url, user): """Create a new profile""" if not url.startswith("http"): url = "https://{}".format(url) saml_xml, roles = saml_login(user, url) if not roles: error("No roles found") exit(1) if len(roles) == 1: role = roles[0] if role[2] is None: role = (role[0], role[1], profile_name) else: role = choice("Please select one role", [(r, get_role_label(r)) for r in sorted(roles)]) data = obj["config"] if not data: data = {} data[profile_name] = {"saml_identity_provider_url": url, "saml_role": role, "saml_user": user} path = obj["config-file"] with Action("Storing new profile in {}..".format(path)): os.makedirs(obj["config-dir"], exist_ok=True) with open(path, "w") as fd: yaml.safe_dump(data, fd)
def get_named_token(scope, realm, name, user, password, url=None, insecure=False, refresh=False, use_keyring=True): if name and not refresh: existing_token = get_existing_token(name) if existing_token: return existing_token config = get_config() url = url or config.get('url') while not url: url = click.prompt('Please enter the OAuth access token service URL') if not url.startswith('http'): url = 'https://{}'.format(url) try: requests.get(url, timeout=5, verify=not insecure) except: error('Could not reach {}'.format(url)) url = None config['url'] = url os.makedirs(CONFIG_DIR_PATH, exist_ok=True) with open(CONFIG_FILE_PATH, 'w') as fd: yaml.dump(config, fd) password = password or keyring.get_password(KEYRING_KEY, user) if not password: password = click.prompt('Password', hide_input=True) result = get_new_token(realm, scope, user, password, url=url, insecure=insecure) if result and use_keyring: keyring.set_password(KEYRING_KEY, user, password) access_token = result.get('access_token') if not access_token: raise click.UsageError(yaml.safe_dump(result)) if name: try: with open(TOKENS_FILE_PATH) as fd: data = yaml.safe_load(fd) except: data = None if not data: data = {} data[name] = result data[name]['creation_time'] = time.time() with open(TOKENS_FILE_PATH, 'w') as fd: yaml.safe_dump(data, fd) return result
def connection_error(e: requests.ConnectionError, fatal=True): reason = e.args[0].reason # type: requests.packages.urllib3.exceptions.NewConnectionError _, pretty_reason = str(reason).split(':', 1) msg = ' {}'.format(pretty_reason) if fatal: fatal_error(msg) else: error(msg)
def switch_active(ctx, group_name, user_name): action("Switching active user ....") r = delete("/groups/{}/active/".format(group_name)) r = put("/groups/{}/active/{}/".format(group_name, user_name)) if r.text == '1': ok() else: error("failed to switch")
def delete(url): data = get_config_data() response = request(requests.delete, url) if response.status_code == 401: clickclick.error("Authorization failed") data['password'] = query_password(data['user']) return delete(url) response.raise_for_status() return response
def gather_user_variables(variables, region): # maximal 32 characters because of the loadbalancer-name prompt(variables, 'application_id', 'Application ID', default='hello-world', value_proc=check_value(32, '^[a-zA-Z][-a-zA-Z0-9]*$')) prompt( variables, 'docker_image', 'Docker image without tag/version (e.g. "pierone.example.org/myteam/myapp")', default='stups/hello-world') prompt(variables, 'http_port', 'HTTP port', default=8080, type=int) prompt(variables, 'http_health_check_path', 'HTTP health check path', default='/') prompt(variables, 'instance_type', 'EC2 instance type', default='t2.micro') if 'pierone' in variables['docker_image'] or confirm( 'Did you need OAuth-Credentials from Mint?'): prompt(variables, 'mint_bucket', 'Mint S3 bucket name', default=lambda: get_mint_bucket_name(region)) else: variables['mint_bucket'] = None http_port = variables['http_port'] sg_name = 'app-{}'.format(variables['application_id']) rules_missing = check_security_group(sg_name, [('tcp', 22), ('tcp', http_port)], region, allow_from_self=True) if ('tcp', 22) in rules_missing: warning( 'Security group {} does not allow SSH access, you will not be able to ssh into your servers' .format(sg_name)) if ('tcp', http_port) in rules_missing: error( 'Security group {} does not allow inbound TCP traffic on the specified HTTP port ({})' .format(sg_name, http_port)) rules_missing = check_security_group(sg_name + '-lb', [('tcp', 443)], region) if rules_missing: error( 'Load balancer security group {} does not allow inbound HTTPS traffic' .format(sg_name)) check_iam_role(variables['application_id'], variables['mint_bucket'], region) return variables
def delete_check_definition(check_id): '''Delete an orphan check definition''' action('delete check id {} ...'.format(check_id)) r = delete('/check-definitions/{}'.format(check_id)) if r.status_code == 200: ok() else: error(r.text)
def post(url, body): data = get_config_data() response = request(requests.post, url, data=body, headers={'content-type': 'application/json'}) if response.status_code == 401: clickclick.error("Authorization failed") data['password'] = query_password(data['user']) return get(url) response.raise_for_status() return response
def check_redis_host(host, port=6379): action("Check Redis on {}".format(host)) action("...") try: r = StrictRedis(host, port) workers = r.smembers("zmon:metrics") ok() return r, workers except Exception as e: error(e)
def get_entity(ctx, entity_id): try: r = get('/entities/{}/'.format(urllib.parse.quote_plus(entity_id))) if r.status_code == 200 and r.text != "": print(dump_yaml(r.json())) else: action("getting entity " + entity_id + "...") error("not found") except Exception as ex: error("Exception during get entity: " + str(ex))
def delete_entity(ctx, entity_id): action("delete entity... {}".format(entity_id)) try: r = delete('/entities/?id={}'.format(urllib.parse.quote_plus(entity_id))) if r.status_code == 200 and r.text == "1": ok() else: error("Delete unsuccessfull") except Exception as ex: error("Exception during delete: " + str(ex))
def gather_user_variables(variables, region): if click.confirm('Do you want to set the docker image now? [No]'): prompt(variables, "docker_image", "Docker Image Version", default=get_latest_spilo_image()) prompt(variables, 'wal_s3_bucket', 'Postgres WAL S3 bucket to use', default='zalando-spilo-app') prompt(variables, 'instance_type', 'EC2 instance type', default='t2.micro') prompt(variables, 'hosted_zone', 'Hosted Zone', default=get_default_zone(region) or 'example.com') if (variables['hosted_zone'][-1:] != '.'): variables['hosted_zone'] += '.' prompt(variables, 'discovery_domain', 'ETCD Discovery Domain', default='postgres.'+variables['hosted_zone'][:-1]) if variables['instance_type'].lower().split('.')[0] in ('c3', 'g2', 'hi1', 'i2', 'm3', 'r3'): variables['use_ebs'] = click.confirm('Do you want database data directory on external (EBS) storage? [Yes]', default=True) else: variables['use_ebs'] = True if variables['use_ebs']: prompt(variables, 'volume_size', 'Database volume size (GB, 10 or more)', default=10) prompt(variables, 'volume_type', 'Database volume type (gp2, io1 or standard)', default='gp2') if variables['volume_type'] == 'io1': pio_max = variables['volume_size'] * 30 prompt(variables, "volume_iops", 'Provisioned I/O operations per second (100 - {0})'. format(pio_max), default=str(pio_max)) prompt(variables, "snapshot_id", "ID of the snapshot to populate EBS volume from", default="") if ebs_optimized_supported(variables['instance_type']): variables['ebs_optimized'] = True prompt(variables, "fstype", "Filesystem for the data partition", default="ext4") prompt(variables, "fsoptions", "Filesystem mount options (comma-separated)", default="noatime,nodiratime,nobarrier") prompt(variables, "scalyr_account_key", "Account key for your scalyr account", "") variables['postgres_port'] = POSTGRES_PORT variables['healthcheck_port'] = HEALTHCHECK_PORT sg_name = 'app-spilo' rules_missing = check_security_group(sg_name, [('tcp', 22), ('tcp', POSTGRES_PORT), ('tcp', HEALTHCHECK_PORT)], region, allow_from_self=True) if ('tcp', 22) in rules_missing: warning('Security group {} does not allow SSH access, you will not be able to ssh into your servers'. format(sg_name)) if ('tcp', POSTGRES_PORT) in rules_missing: error('Security group {} does not allow inbound TCP traffic on the default postgres port ({})'.format( sg_name, POSTGRES_PORT )) if ('tcp', HEALTHCHECK_PORT) in rules_missing: error('Security group {} does not allow inbound TCP traffic on the default health check port ({})'. format(sg_name, HEALTHCHECK_PORT)) variables['spilo_sg_id'] = get_security_group(region, sg_name).id check_s3_bucket(variables['wal_s3_bucket'], region) return variables
def find_latest_docker_image_version(image): docker_image = pierone.api.DockerImage.parse(image) if not docker_image.registry: error('Could not resolve "latest" tag for {}: missing registry.'.format(image)) exit(2) token = zign.api.get_token('uid', ['uid']) latest_tag = pierone.api.get_latest_tag(docker_image, token) if not latest_tag: error('Could not resolve "latest" tag for {}'.format(image)) exit(2) return latest_tag
def get_api_server_url(cluster_registry_url: str, cluster_id: str): token = zign.api.get_token('kubectl', ['uid']) response = requests.get('{}/kubernetes-clusters/{}'.format(cluster_registry_url, cluster_id), headers={'Authorization': 'Bearer {}'.format(token)}, timeout=5) if response.status_code == 404: error('Kubernetes cluster {} not found in Cluster Registry'.format(cluster_id)) exit(1) response.raise_for_status() data = response.json() url = data.get('api_server_url') return url
def sli_update(obj, product, sli_name, start, end): """Update SLI values""" client = get_client(obj) if start and end and start <= end: error('Relative "end" should be less than "start"') return res = client.sli_update(product, sli_name, start=start, end=end) print(json.dumps(res, indent=4))
def switch_deployment(config, application, version, release, ratio, execute): '''Switch to new release''' namespace = config.get('kubernetes_namespace') kubectl_login(config) target_replicas, total = ratio.split('/') target_replicas = int(target_replicas) total = int(total) data = kubectl_get(namespace, 'deployments', '-l', 'application={}'.format(application)) deployments = data['items'] target_deployment_name = '{}-{}-{}'.format(application, version, release) target_deployment_exists = False for deployment in deployments: if deployment['metadata']['name'] == target_deployment_name: target_deployment_exists = True if not target_deployment_exists: error("Deployment {} does not exist!".format(target_deployment_name)) exit(1) resources_update = ResourcesUpdate() remaining_replicas = total - target_replicas for deployment in sorted(deployments, key=lambda d: d['metadata']['name'], reverse=True): deployment_name = deployment['metadata']['name'] if deployment_name == target_deployment_name: replicas = target_replicas else: # maybe spread across all other deployments? replicas = remaining_replicas remaining_replicas = 0 info('Scaling deployment {} to {} replicas..'.format( deployment_name, replicas)) resources_update.set_number_of_replicas(deployment_name, replicas) cluster_id = config.get('kubernetes_cluster') namespace = config.get('kubernetes_namespace') path = '/kubernetes-clusters/{}/namespaces/{}/resources'.format( cluster_id, namespace) response = request(config, requests.patch, path, json=resources_update.to_dict()) change_request_id = response.json()['id'] if execute: approve_and_execute(config, change_request_id) else: print(change_request_id)
def configure(args): # naive option parsing config = {'cluster_registry': None} for arg in args: if arg.startswith('--'): key, val = arg.split('=', 1) config_key = key[2:].replace('-', '_') if config_key not in config: error('Unsupported option "{}"'.format(key)) exit(2) config[config_key] = val stups_cli.config.store_config(config, APP_NAME)
def get_entity(ctx, entity_id): try: r = get('/entities/{}/'.format(urllib.parse.quote_plus(entity_id))) if r.status_code == 200 and r.text != "": print(yaml.safe_dump(r.json(), default_flow_style=False, allow_unicode=True, encoding='utf-8').decode('utf-8')) else: action("getting entity " + entity_id + "...") error("not found") except Exception as ex: error("Exception during get entity: " + str(ex))
def get_named_token(scope, realm, name, user, password, url=None, insecure=False, refresh=False, use_keyring=True, prompt=False): '''get named access token, return existing if still valid''' if name and not refresh: existing_token = get_existing_token(name) if existing_token: return existing_token config = get_config() url = url or config.get('url') while not url and prompt: url = click.prompt('Please enter the OAuth access token service URL') if not url.startswith('http'): url = 'https://{}'.format(url) try: requests.get(url, timeout=5, verify=not insecure) except: error('Could not reach {}'.format(url)) url = None config['url'] = url stups_cli.config.store_config(config, 'zign') password = password or keyring.get_password(KEYRING_KEY, user) while True: if not password and prompt: password = click.prompt('Password for {}'.format(user), hide_input=True) try: result = get_new_token(realm, scope, user, password, url=url, insecure=insecure) break except AuthenticationFailed as e: if prompt: error(e) info('Please check your username and password and try again.') password = None else: raise if result and use_keyring: keyring.set_password(KEYRING_KEY, user, password) if name: store_token(name, result) return result
def check_queues(redis): queues = ['zmon:queue:default', 'zmon:queue:snmp', 'zmon:queue:internal', 'zmon:queue:secure'] for q in queues: action('Checking queue length ... {} ...'.format(q)) l = redis.llen(q) action("...") highlight("{}".format(l)) action(" ...") if l < 2000: ok() continue error("to many tasks")
def edit_etc_hosts(hosts_file, backup_file, args): with hosts_file.open() as fd: old_contents = fd.read() HEADER = '#### Start of entries generated by local-cname' if HEADER in old_contents: error('{} seems to have already been modified by local-cname.'.format( hosts_file)) info('Remove the local-cname header line from this file to proceed.') sys.exit(1) with backup_file.open('w') as fd: fd.write(old_contents) try: while True: entries = [] with Action('Resolving {} ..'.format(args.to)): results = socket.getaddrinfo(args.to, 80, type=socket.SOCK_STREAM) for result in results: family, type, proto, canonname, sockaddr = result if family in (socket.AF_INET, socket.AF_INET6): ip = sockaddr[0] entries.append((getattr(args, 'from'), ip)) info('Current entries:') for hostname, ip in entries: info('{} -> {}'.format(hostname, ip)) with Action('Writing {} ..'.format(hosts_file)): with hosts_file.open('w') as fd: fd.write(old_contents) fd.write('{}\n'.format(HEADER)) for hostname, ip in entries: fd.write('{} {}\n'.format(ip, hostname)) time.sleep(60) except KeyboardInterrupt: # ignore, do not print stacktrace pass finally: try: backup_file.rename(hosts_file) except OSError: with hosts_file.open('w') as fd: fd.write(old_contents) os.remove(backup_file)
def check_schedulers(r, schedulers): for s in schedulers: action('Check scheduler {} .....'.format(s[2:])) try: ts = r.get("zmon:metrics:{}:ts".format(s)) if ts is None: error("No scheduling loop registered ( running/stuck? )") continue delta = int(time.time() - float(ts)) action("... last loop") highlight("{}".format(delta)) action("s ago ...") if delta > 300: error("Last loop more than 300s ago (stuck? restart?)".format(delta)) continue if delta > 180: error("Last loop more than 180s ago (stuck? check logs/watch)".format(delta)) continue action("...") ok() except Exception as e: error(e)
def request(config: dict, method, path: str, headers=None, exit_on_error=True, **kwargs): token = zign.api.get_token('uid', ['uid']) if not headers: headers = {} headers['Authorization'] = 'Bearer {}'.format(token) if config.get('user'): headers['X-On-Behalf-Of'] = config['user'] api_url = config.get('deploy_api') url = urllib.parse.urljoin(api_url, path) response = method(url, headers=headers, timeout=DEFAULT_HTTP_TIMEOUT, **kwargs) if exit_on_error: if not (200 <= response.status_code < 400): error('Server returned HTTP error {} for {}:\n{}'.format(response.status_code, url, response.text)) exit(2) return response
def test_echo(): action('Action..') ok() action('Action..') error(' some error') action('Action..') with pytest.raises(SystemExit): fatal_error(' some fatal error') # noqa action('Action..') warning(' some warning') info('Some info')
def status(config): """check system status""" redis, workers = check_redis_host(config['redis_host'], 6379) print("") workers = list(map(lambda x: x.decode(), sorted(workers))) action("Looking for <30s interval scheduler ...") scheduler = list(filter(lambda x: x[:7] == 's-p3423', workers)) if not scheduler: error("not found! check p3423") else: action("... running {}".format(scheduler[0][2:])) ok() action("Looking for >30s interval scheduler ...") scheduler = list(filter(lambda x: x[:7] == 's-p3422', workers)) if not scheduler: error("not found! check p3422") else: action("... running {}".format(scheduler[0][2:])) ok() action("Looking for NG scheduler ...") scheduler = list(filter(lambda x: x == 's-p3421.monitor02', workers)) if not scheduler: error("not found! check p3421 on monitor02") else: action("... running {}".format(scheduler[0][2:])) ok() action("Looking for self monitoring scheduler ...") scheduler = list(filter(lambda x: x == 's-p3421.itr-monitor01', workers)) if not scheduler: error("not found! check p3411 on itr-monitor02") else: action("... running {}".format(scheduler[0][2:])) ok() print("") ws = [] ss = [] for w in workers: if w[:2] == "s-": ss.append(w) else: ws.append(w) check_schedulers(redis, ss) print("") check_queues(redis) print("") check_workers(redis, ws)
def raise_for_status(response, elastigroup_id): try: response.raise_for_status() except HTTPError: status = response.json().get("response", {}).get("status") details = response.json().get("response", {}).get("errors")[0] error("HTTP Error: {}[{}]".format(status.get("message"), status.get("code"))) error("{}[{}]".format(details.get("message"), details.get("code"))) if details.get("code") == "DEPLOYMENT_ALREADY_IN_PROGRESS": warning( "An older deploy is still running, check on SpotInst console deployments tab for elastigroup: [{}]." .format(elastigroup_id))
def delete_deployment(config, deployment, execute): '''Delete deployment by first scaling down to 0, deleting the deployment resource and any replicaset resources owned by the deployment.''' cluster_id = config.get('kubernetes_cluster') name = deployment['metadata']['name'] namespace = deployment['metadata']['namespace'] # scale deployment to 0 before deleting _scale_deployment(config, name, namespace, 0, execute) # with for deployment to be scaled down to 0 timeout = DEFAULT_RESOURCE_DELETION_TIMEOUT maxtime = time.time() + timeout while get_replicas(name, namespace) > 0: if time.time() > maxtime: error('Timed out after {:d}s waiting for deployment to scale down'.format(timeout)) return # get replicasets owned by the deployment replicasets = kubectl_get(namespace, 'replicasets') owned_rs = get_owned_replicasets(deployment, replicasets['items']) # delete deployment info('Deleting deployment {}..'.format(name)) path = '/kubernetes-clusters/{}/namespaces/{}/deployments/{}'.format( cluster_id, namespace, name) response = request(config, requests.delete, path) change_request_id = response.json()['id'] if execute: approve_and_execute(config, change_request_id) else: print(change_request_id) # delete replicasets for rs in owned_rs: name = rs['metadata']['name'] info('Deleting replicaset {}..'.format(name)) path = '/kubernetes-clusters/{}/namespaces/{}/replicasets/{}'.format( cluster_id, namespace, name) response = request(config, requests.delete, path) change_request_id = response.json()['id'] if execute: approve_and_execute(config, change_request_id) else: print(change_request_id)
def agent_error(e: requests.HTTPError, fatal=True): """ Prints an agent error and exits """ data = e.response.json() details = data['detail'] # type: str if details: lines = ('[AGENT] {}'.format(line) for line in details.splitlines()) msg = '\n' + '\n'.join(lines) else: msg = "[AGENT] {status} {title}".format_map(data) if fatal: fatal_error(msg) else: error(msg)
def agent_error(e: requests.HTTPError, fatal=True): """ Prints an agent error and exits """ try: data = e.response.json() details = data['detail'] # type: str except JSONDecodeError: details = e.response.text or str(e.response) lines = ('[AGENT] {}'.format(line) for line in details.splitlines()) msg = '\n' + '\n'.join(lines) if fatal: fatal_error(msg) else: error(msg)
def send_request_to_loggly(ctx, request: str): app_config = ctx.obj.config if 'loggly_user' not in app_config: error( 'No Loggly credentials configured. Please set them via `app configure`' ) response = requests.get(request, auth=(app_config['loggly_user'], app_config['loggly_password'])) if response.status_code == 200: return response.json() else: error('Request "{}" failed with status code {}'.format( request, response.status_code)) return None
def encrypt(config, use_kms, kms_keyid, region): '''Encrypt plain text (read from stdin) for deployment configuration''' plain_text = sys.stdin.read() if use_kms: if not kms_keyid: try: cluster = config['kubernetes_cluster'] except KeyError: error("'kubernetes-cluster' not set. " "Please configure zdeploy.") sys.exit(1) local_id = cluster.rsplit(':')[-1] kms_keyid = 'alias/{}-deployment-secret'.format(local_id) try: kms = boto3.client("kms", region) encrypted = kms.encrypt(KeyId=kms_keyid, Plaintext=plain_text.encode()) encrypted = base64.b64encode(encrypted['CiphertextBlob']) account_name = get_aws_account_name() print("deployment-secret:{account_name}:{encrypted}".format( account_name=account_name, encrypted=encrypted.decode() )) except boto_exceptions.ClientError as exception: error_dict = exception.response["Error"] error_code = error_dict["Code"] if error_code == "NotFoundException": message = ("KMS key '{}' not found. " "Please check your AWS region.".format(kms_keyid)) elif error_code == "ExpiredTokenException": message = "Not logged in to AWS" else: message = "Failed to encrypt with KMS" error(message) sys.exit(1) else: api_url = config.get('deploy_api') url = '{}/secrets'.format(api_url) response = request(config, requests.post, url, json={'plaintext': plain_text}) encrypted = response.json()['data'] print("deployment-secret:autobahn-encrypted:{}".format(encrypted))
def run_linter(spec_file): spec = yaml.safe_load(spec_file) spec = compatibility_layer(spec) try: resolver = validate_spec(spec) except Exception as e: error('Error during Swagger schema validation:\n{}'.format(e)) return # collect all "rules" defined as functions starting with "lint_" rules = [f for name, f in globals().items() if name.startswith('lint_')] for func in rules: for issue in func(spec, resolver): if isinstance(issue, tuple): location, message = issue else: location = issue message = None warning('{}: {}{}'.format(location, message + ' ' if message else '', func.__doc__))
def validate_ssh_key(option_path: str, config_path: str, fallback_path: str, interactive: bool) -> str: if option_path: if check_ssh_key(option_path): return option_path if not interactive: error( "specified ssh public key at {0:s} is not a valid key".format( option_path)) sys.exit(1) elif check_ssh_key(config_path): return config_path elif check_ssh_key(fallback_path): return fallback_path if not interactive: error("No valid SSH public key could be determined. " "Please specify one with the -i flag. Consult help for details") sys.exit(1) return ""
def all_stacks_in_final_state(related_stacks_refs: list, region: str, timeout: Optional[int], interval: int): """ Wait and check if all related stacks are in a final state before performing code block changes. If there is no timeout, we don't wait anything and just execute the traffic change. :param related_stacks_refs: Related stacks to wait :param region: region where stacks are present :param timeout: optional value of how long we should wait for the stack should be `None` :param interval: interval between checks using AWS CF API """ if timeout is None or timeout < 1: yield else: wait_timeout = datetime.datetime.utcnow() + datetime.timedelta( seconds=timeout) all_in_final_state = False while not all_in_final_state and wait_timeout > datetime.datetime.utcnow( ): # assume all stacks are ready all_in_final_state = True related_stacks = get_stacks(related_stacks_refs, region) if not related_stacks: error("Stack not found!") exit(1) for related_stack in related_stacks: current_stack_status = related_stack.StackStatus if current_stack_status.endswith("_IN_PROGRESS"): # some operation in progress, let's wait some time to try again all_in_final_state = False info( "Waiting for stack {} ({}) to perform requested operation.." .format(related_stack.StackName, current_stack_status)) time.sleep(interval) if datetime.datetime.utcnow() > wait_timeout: info("Timeout reached, requested operation not executed.") exit(1) else: yield
def main(): parser = argparse.ArgumentParser() parser.add_argument('from') parser.add_argument('to') args = parser.parse_args() hosts_file = Path('/etc/hosts') backup_file = hosts_file.with_suffix('.local-cname-backup') lock_file = hosts_file.with_suffix('.local-cname-lock') lock = FileLock(str(lock_file)) try: with lock.acquire(timeout=1): edit_etc_hosts(hosts_file, backup_file, args) except Timeout: error('Another instance of local-cname seems to be running.') info('(if a previous process crashed, remove {} ' 'and check the contents of {} and {})'.format( lock_file, hosts_file, backup_file)) sys.exit(1)
def get_config_data(config_file=DEFAULT_CONFIG_FILE): fn = os.path.expanduser(config_file) data = {} try: if os.path.exists(fn): with open(fn) as fd: data = json.load(fd) else: warning('No configuration file found at [{}]'.format(config_file)) data = set_config_file() if not data: error('Failed to configure ZMON SLR cli.') except Exception as e: fatal_error(e) return data
def set_pierone_url(config: dict, url: str) -> None: '''Read Pier One URL from cli, from config file or from stdin.''' url = url or config.get('url') while not url: url = click.prompt('Please enter the Pier One URL', type=UrlType()) try: requests.get(url, timeout=5) except: error('Could not reach {}'.format(url)) url = None if '://' not in url: # issue 63: gracefully handle URLs without scheme url = 'https://{}'.format(url) validate_pierone_url(url) config['url'] = url return url