def get(instance_id, instance, res): res['ready'] = True app_pods_status = {} for pod in kubectl.get('pods', namespace=instance_id, required=True)['items']: app = pod['metadata']['labels'].get('app') if not app: app = 'unknown' item_status = kubectl.get_item_detailed_status(pod) if item_status.get('errors') and len(item_status['errors']) > 0: res['ready'] = False app_pods_status.setdefault(app, {})[pod['metadata']['name']] = item_status app_deployments_status = {} for deployment in kubectl.get('deployments', namespace=instance_id, required=True)['items']: app = deployment['metadata']['labels'].get('app') if not app: app = 'unknown' item_status = kubectl.get_item_detailed_status(deployment) if item_status.get('errors') and len(item_status['errors']) > 0: res['ready'] = False app_deployments_status.setdefault( app, {})[deployment['metadata']['name']] = item_status if 'jenkins' not in app_pods_status: res['ready'] = False res['app'] = { 'pods': app_pods_status, 'deployments': app_deployments_status }
def update(router_name, wait_ready, spec, annotations, routes, dry_run=False): old_deployment = kubectl.get(f'deployment router-traefik-{router_name}', required=False) old_generation = old_deployment.get('metadata', {}).get('generation') if old_deployment else None expected_new_generation = old_generation + 1 if old_generation else None if expected_new_generation: print(f'old deployment generation: {old_generation}') else: print('Creating new deployment') if not dry_run: annotations.update_status( 'router', 'created', lambda: _update(router_name, spec, annotations, routes), force_update=True ) if expected_new_generation: while True: time.sleep(.2) new_deployment = kubectl.get(f'deployment router-traefik-{router_name}', required=False) if not new_deployment: continue new_generation = new_deployment.get('metadata', {}).get('generation') if not new_generation: continue if new_generation == old_generation: continue if new_generation != expected_new_generation: raise Exception(f'Invalid generation: {new_generation} (expected: {expected_new_generation})') print(f'new deployment generation: {new_generation}') break if wait_ready: print('Waiting for instance to be ready...') while time.sleep(2): if get(router_name)['ready']: break print('.')
def get_deis_instance_credentials(instance_id, is_datastore=False, is_datastore_readonly=False, required=True, with_db_prefix=False): none = (None, None, None, None) if with_db_prefix else (None, None, None) instance_kind = ckan_manager.instance_kind() instance = kubectl.get(f'{instance_kind} {instance_id}', required=required) if not instance: return none secret = kubectl.get(f'secret {instance_id}-annotations', namespace=instance_id, required=required) if not secret: return none secret = kubectl.decode_secret(secret) if is_datastore or is_datastore_readonly: db_name = user = instance['spec'].get('datastore', {}).get('name') if is_datastore_readonly: user = secret.get('datastoreReadonlyUser') password = secret.get('datatastoreReadonlyPassword') else: password = secret.get('datastorePassword') else: db_name = user = instance['spec'].get('db', {}).get('name') password = secret.get('databasePassword') res = [user, password, db_name] if all(res): if with_db_prefix: res.append( get_deis_instance_db_prefix_from_instance( instance, is_datastore or is_datastore_readonly)) return res else: assert not required, 'missing some db values' return none
def update(self, wait_ready=False, skip_solr=False, skip_deployment=False): """Ensure the instance is updated to latest spec""" old_deployment = kubectl.get(f'deployment {self.id}', required=False, namespace=self.id) if old_deployment: old_deployment_generation = old_deployment.get('metadata', {}).get('generation') else: old_deployment_generation = None if old_deployment_generation: expected_new_deployment_generation = old_deployment_generation + 1 else: expected_new_deployment_generation = 1 print(f'old deployment generation = {old_deployment_generation}') DeisCkanInstanceNamespace(self).update() DeisCkanInstanceDb(self, 'db').update() DeisCkanInstanceDb(self, 'datastore').update() if not skip_solr: DeisCkanInstanceSolr(self).update() DeisCkanInstanceStorage(self).update() DeisCkanInstanceRegistry(self).update() envvars = DeisCkanInstanceEnvvars(self) envvars.update() if not skip_deployment: DeisCkanInstanceDeployment(self).update() while True: time.sleep(.2) new_deployment = kubectl.get(f'deployment {self.id}', required=False, namespace=self.id) if not new_deployment: continue new_deployment_generation = new_deployment.get('metadata', {}).get('generation') if not new_deployment_generation: continue if new_deployment_generation == old_deployment_generation: continue if new_deployment_generation != expected_new_deployment_generation: raise Exception(f'Invalid generation: {new_deployment_generation} ' f'(expected: {expected_new_deployment_generation}') print(f'new deployment generation: {new_deployment_generation}') break if wait_ready: print('Waiting for ready status') time.sleep(3) while True: data = self.get() if data.get('ready'): print(yaml.dump(data, default_flow_style=False)) break else: print(yaml.dump( { k: v for k, v in data.items() if (k not in ['ready'] and type(v) == dict and not v.get('ready')) or k == 'namespace' }, default_flow_style=False) ) time.sleep(2) self.ckan.update() try: DeisCkanInstanceDb(self, 'datastore').set_datastore_readonly_permissions() except Exception: logs.warning('Setting datastore permissions failed, continuing anyway') # Create/Update uptime monitoring after everything else is ready DeisCkanInstanceUptime(self).update(envvars.site_url)
def list_configs(namespace=None, full=False, show_secrets=False): label_prefix = labels_manager.get_label_prefix() if not namespace: namespace = cluster_manager.get_operator_namespace_name() what = 'configmaps' if show_secrets: what += ',secrets' configs = kubectl.get( what, '-l', f'{label_prefix}/operator-config-namespace={namespace}', required=False) if configs: for config in configs.get('items', []): kind = config['kind'] name = config.get('metadata', {}).get('name') data = { 'kind': config['kind'], 'name': config.get('metadata', {}).get('name') } if full: if name: data['values'] = get( secret_name=name if kind == 'Secret' else None, configmap_name=name if kind == 'ConfigMap' else None, namespace=namespace, required=False) else: data['values'] = None yield data
def set_secrets(self, key_values): for key in key_values: assert key in SECRET_ANNOTATIONS, f'invalid secret key: {key}' secret = getattr(self, '_secret', None) cur_data = secret.get('data', {}) if secret and secret != __NONE__ else {} secret = kubectl.get(f'secret {self.instance.id}-annotations', namespace=self.instance.id, required=False) if not secret: secret = {'data': {}} secret['data'].update(**cur_data) for key, value in key_values.items(): secret['data'][key] = base64.b64encode(value.encode()).decode() secret = { 'apiVersion': 'v1', 'kind': 'Secret', 'metadata': { 'name': f'{self.instance.id}-annotations', 'namespace': self.instance.id }, 'type': 'Opaque', 'data': secret['data'] } subprocess.run(f'kubectl -n {self.instance.id} apply -f -', input=yaml.dump(secret).encode(), shell=True, check=True) self._secret = secret
def get(what, *args, required=True, namespace=None, get_cmd=None, **kwargs): return kubectl.get(what, *args, required=required, namespace=namespace, get_cmd=get_cmd, **kwargs)
def zk_put_configs(configs_dir): def retry_if_fails(command, max_retries=15): if max_retries < 0: return try: kubectl.check_output(command) except: time.sleep(5) retry_if_fails(command, max_retries=max_retries - 1) pod_name = kubectl.get('pods', '-l', 'app=provider-solr-solrcloud-zk', required=True)['items'][0]['metadata']['name'] logs.info(f'using pod {pod_name}') for input_filename in glob.glob(f'{configs_dir}/**/*', recursive=True): if not os.path.isfile(input_filename): continue output_filename = '/configs' + input_filename.replace(configs_dir, '') logs.info(f'{input_filename} --> {output_filename}') output_filepath = '' for output_filepart in output_filename.split('/')[:-1]: output_filepart = output_filepart.strip() if not output_filepart: continue output_filepath += f'/{output_filepart}' logs.info(f'create {output_filepath} null') retry_if_fails( f'exec {pod_name} zkCli.sh create {output_filepath} null') logs.info(f'copy {output_filename}') retry_if_fails(f'cp {input_filename} {pod_name}:/tmp/zk_input') logs.info(f'create {output_filename}') retry_if_fails( f"exec {pod_name} -- /bin/bash -c '/usr/bin/zkCli.sh create {output_filename} \"$(cat /tmp/zk_input)\"'" )
def get(router_name_or_values, required=False, only_dns=False, failfast=False): if type(router_name_or_values) == str: router_name = router_name_or_values router_values = kubectl.get(f'CkanCloudRouter {router_name}', required=required) else: router_name = router_name_or_values['metadata']['name'] router_values = router_name_or_values router, spec, router_type, annotations, labels, router_type_config = _init_router(router_name, router_values, required=required) if router: dns_data = router_type_config['manager'].get(router_name, 'dns', router, failfast=True) if not only_dns: deployment_data = router_type_config['manager'].get(router_name, 'deployment') routes = routes_manager.list(_get_labels(router_name, router_type)) else: deployment_data = None routes = None if only_dns: return {'name': router_name, 'dns': dns_data} else: return {'name': router_name, 'annotations': router_values['metadata']['annotations'], 'routes': [route.get('spec') for route in routes] if routes else [], 'type': router_type, 'deployment': deployment_data, 'ready': deployment_data.get('ready', False), 'dns': dns_data, 'spec': {'ready': True, **router_values['spec']}} else: return None
def get_kube_version_info(): version = kubectl.get('', get_cmd='version') version = { 'clientMajor': version['clientVersion']['major'], 'clientMinor': version['clientVersion']['minor'], 'serverMajor': version['serverVersion']['major'], 'serverMinor': version['serverVersion']['minor'], } try: client_major = int(version['clientMajor']) except Exception: raise Exception(f'Failed to get kubectl client major version (clientMajor={version["clientMajor"]}') client_minor = version['clientMinor'] if client_minor.endswith('+'): client_minor = int(client_minor[:-1]) else: client_minor = int(client_minor) assert client_major == 1 and client_minor >= 11, 'Invalid kubectl client version, ' \ 'minimal supported version: 1.11\n' \ 'If you are using GKE, run: gcloud components update' try: server_major = int(version['serverMajor']) except Exception: raise Exception(f'Failed to get Kubernetes server major version (serverMajor={version["serverMajor"]}') server_minor = version['serverMinor'] if server_minor.endswith('+'): server_minor = int(server_minor[:-1]) else: server_minor = int(server_minor) assert server_major == 1 and server_minor >= 10, "Invalid Kubernetes server version, " \ "minimal supported version: 1.10" return version
def _init_namespace(instance_id, dry_run=False): logs.debug('Initializing helm-based instance deployment namespace', namespace=instance_id) if kubectl.get('ns', instance_id, required=False): logs.info(f'instance namespace already exists ({instance_id})') else: logs.info(f'creating instance namespace ({instance_id})') kubectl.apply(kubectl.get_resource('v1', 'Namespace', instance_id, {}), dry_run=dry_run) service_account_name = f'ckan-{instance_id}-operator' logs.debug('Creating service account', service_account_name=service_account_name) if not dry_run: kubectl_rbac_driver.update_service_account( f'ckan-{instance_id}-operator', {}, namespace=instance_id) role_name = f'ckan-{instance_id}-operator-role' logs.debug('Creating role and binding to the service account', role_name=role_name) if not dry_run: kubectl_rbac_driver.update_role(role_name, {}, [{ "apiGroups": ["*"], "resources": ['secrets', 'pods', 'pods/exec', 'pods/portforward'], "verbs": ["list", "get", "create"] }], namespace=instance_id) kubectl_rbac_driver.update_role_binding( name=f'ckan-{instance_id}-operator-rolebinding', role_name=f'ckan-{instance_id}-operator-role', namespace=instance_id, service_account_name=f'ckan-{instance_id}-operator', labels={})
def zk_list_configs(): pod_name = kubectl.get('pods', '-l', 'app=provider-solr-solrcloud-zk', required=True)['items'][0]['metadata']['name'] lines = list(kubectl.check_output(f'exec {pod_name} zkCli.sh ls /configs').decode().splitlines())[5:] if len(lines) == 1: return [name.strip() for name in lines[0][1:-1].split(',')] else: return []
def get_load_balancer_ip(router_name, failfast=False): resource_name = _get_resource_name(router_name) RETRIES = 10 for retries in range(RETRIES): load_balancer = kubectl.get(f'service loadbalancer-{resource_name}', required=False) if not load_balancer: if failfast: return None else: continue ingresses = load_balancer.get('status', {}).get('loadBalancer', {}).get('ingress', []) if len(ingresses) == 0: continue assert len(ingresses) == 1 if cluster_manager.get_provider_id() == 'aws': load_balancer_hostname = ingresses[0].get('hostname') if load_balancer_hostname: return load_balancer_hostname logs.warning('Failed to get hostname, retrying %r' % ingresses[0]) else: load_balancer_ip = ingresses[0].get('ip') if load_balancer_ip: return load_balancer_ip logs.warning('Failed to get ip, retrying %r' % ingresses[0]) time.sleep(60) assert retries < RETRIES - 1, "Gave up on waiting for load balancer IP"
def delete_bucket(instance_id, dry_run=False): s3_buckets = list( filter(lambda x: x.startswith(f'{instance_id}-cc'), list_s3_buckets(names_only=True))) if not s3_buckets: logs.warning( f'No bucket found for the instance "{instance_id}". Skipping.') return instance = kubectl.get(f'ckancloudckaninstance {instance_id}') bucket = instance['spec'].get('ckanStorageBucket').get(PROVIDER_ID) if not bucket: logs.warning('This instance does not have S3 bucket attached.') return bucket_name = bucket.get('BUCKET_NAME') cmd = f's3 rm {bucket_name} --recursive' if dry_run: cmd += ' --dryrun' # Two steps deletion. See the `aws s3 rb help` aws_check_output(cmd) if not dry_run: aws_check_output(f's3 rb {bucket_name}')
def delete(router_name): print(f'Deleting traefik router {router_name}') if all([ kubectl.call(f'delete --ignore-not-found -l ckan-cloud/router-name={router_name} deployment') == 0, kubectl.call(f'delete --ignore-not-found -l ckan-cloud/router-name={router_name} service') == 0, kubectl.call(f'delete --ignore-not-found -l ckan-cloud/router-name={router_name} secret') == 0, kubectl.call(f'delete --ignore-not-found -l ckan-cloud/router-name={router_name} configmap') == 0, kubectl.call(f'delete --ignore-not-found -l ckan-cloud/router-name={router_name} PersistentVolumeClaim') == 0, kubectl.call(f'delete --ignore-not-found -l ckan-cloud/router-name={router_name} CkanCloudRoute') == 0, kubectl.call(f'delete --ignore-not-found CkanCloudRouter {router_name}') == 0, ]): print('Removing finalizers') success = True routes = kubectl.get_items_by_labels('CkanCloudRoute', {'ckan-cloud/router-name': router_name}, required=False) if not routes: routes = [] for route in routes: route_name = route['metadata']['name'] if kubectl.call( f'patch CkanCloudRoute {route_name} -p \'{{"metadata":{{"finalizers":[]}}}}\' --type=merge', ) != 0: success = False if kubectl.get(f'CkanCloudRouter {router_name}', required=False): if kubectl.call( f'patch CkanCloudRouter {router_name} -p \'{{"metadata":{{"finalizers":[]}}}}\' --type=merge', ) != 0: success = False assert success else: raise Exception('Deletion failed')
def zk_put_configs(configs_dir): pod_name = kubectl.get('pods', '-l', 'app=provider-solr-solrcloud-zk', required=True)['items'][0]['metadata']['name'] for input_filename in glob.glob(f'{configs_dir}/**/*', recursive=True): if not os.path.isfile(input_filename): continue output_filename = '/configs' + input_filename.replace(configs_dir, '') print(f'{input_filename} --> {output_filename}') output_filepath = '' for output_filepart in output_filename.split('/')[:-1]: output_filepart = output_filepart.strip() if not output_filepart: continue output_filepath += f'/{output_filepart}' print(f'create {output_filepath} null') print( kubectl.call( f'exec {pod_name} zkCli.sh create {output_filepath} null')) print(f'copy {output_filename}') print(kubectl.call(f'cp {input_filename} {pod_name}:/tmp/zk_input')) print(f'create {output_filename}') print( kubectl.call( f"exec {pod_name} bash -- -c 'zkCli.sh create {output_filename} \"$(cat /tmp/zk_input)\"'" ))
def list(cls, full=False, quick=False, return_list=False): res = [] data = kubectl.get(ckan_manager.instance_kind(), required=False) if not data: data = {'items': []} for item in data['items']: if quick: data = { 'id': item['metadata']['name'], 'ready': None } if full: data['item'] = item else: try: instance = DeisCkanInstance(item['metadata']['name'], values=item) data = instance.get() if not full: data = {'id': instance.id, 'ready': data['ready']} except Exception: data = {'id': item['metadata']['name'], 'ready': False, 'error': traceback.format_exc()} if return_list: res.append(data) else: print(yaml.dump([data], default_flow_style=False)) if return_list: return res
def get_instance_image(instance_id): return { n: c.get('image', c.get('imageFromGitlab')) for n, c in { i['metadata']['name']: i['spec'].get('ckanContainerSpec', {}) for i in kubectl.get('ckancloudckaninstance')['items'] }.items() }.get(instance_id)
def zk_set_url_scheme(scheme='http'): pod_name = kubectl.get('pods', '-l', 'app=provider-solr-solrcloud-zk', required=True)['items'][0]['metadata']['name'] kubectl.check_output( 'exec %s zkCli.sh set /clusterprops.json \'{"urlScheme":"%s"}\'' % (pod_name, scheme))
def get(router_name): deployment = kubectl.get(f'deployment/router-traefik-{router_name}', required=False) if deployment: return kubectl.get_deployment_detailed_status( deployment, f'ckan-cloud/router-name={router_name}', 'traefik') else: return {'ready': False}
def zk_get_config_file(config_name, config_file, output_filename): path = f'/configs/{config_name}{config_file}' # print(f'path={path}') pod_name = kubectl.get('pods', '-l', 'app=provider-solr-solrcloud-zk', required=True)['items'][0]['metadata']['name'] lines = list(kubectl.check_output(f'exec {pod_name} zkCli.sh get {path} 2>/dev/null').decode().splitlines())[5:] assert len(lines) > 0 os.makedirs(os.path.dirname(output_filename), exist_ok=True) with open(output_filename, 'w') as f: f.write('\n'.join(lines))
def zk_set_url_scheme(scheme='http', timeout=300): pod_name = kubectl.get('pods', '-l', 'app=provider-solr-solrcloud-zk', required=True)['items'][0]['metadata']['name'] try: kubectl.check_output('exec %s zkCli.sh set /clusterprops.json \'{"urlScheme":"%s"}\'' % (pod_name, scheme)) except Exception as e: print('Failed to connect ZooKeeper, retrying in 60 seconds') time.sleep(60) if timeout < 0: raise e zk_set_url_scheme(scheme=scheme, timeout=timeout-60)
def _get_instance_target_port(instance_id): target_port = 5000 instance = kubectl.get(f'ckancloudckaninstance {instance_id}', required=False) if instance: _target_port = instance.get('spec', {}).get('routes', {}).get('target-port') if _target_port: target_port = _target_port return target_port
def _get_secret(key, default=None): __NONE__ = object secret = kubectl.get(f'secret ckan-cloud-provider-db-azuresql-credentials', required=False) if not secret: secret = __NONE__ if secret and secret != __NONE__: value = secret.get('data', {}).get(key, None) return base64.b64decode(value).decode() if value else default else: return default
def get(singular, *args, name=None, required=True, get_cmd='get', **kwargs): """Run kubectl.get for the given crd singular value and optional get args / kwargs""" crd_prefix = get_crd_prefix() _, kind_suffix = _get_plural_kind_suffix(singular) if name: args = [get_resource_name(singular, name), *args] return kubectl.get(f'{crd_prefix}{kind_suffix}', *args, required=required, get_cmd=get_cmd, **kwargs)
def update(name): _update_registry_secret() datapusher = kubectl.get(f'CkanCloudDatapusher {name}') deployment_name = get_deployment_name(name) labels = _get_labels(name) spec = _get_deployment_spec(labels, datapusher['spec']) print( f'Updating CkanCloudDatapusher {name} (deployment_name={deployment_name})' ) deployment = kubectl.get_deployment(deployment_name, labels, spec) kubectl.apply(deployment)
def auto_get_availability_zone(): print('getting availability zone with most nodes in the cluster') zones = collections.defaultdict(int) for node in kubectl.get('nodes')['items']: zones[node['metadata']['labels'] ['failure-domain.beta.kubernetes.io/zone']] += 1 return sorted([{ 'zone': zone, 'nodes': nodes } for zone, nodes in zones.items()], key=lambda item: item['nodes'], reverse=True)[0]['zone']
def get_kube_version_info(): version = kubectl.get('', get_cmd='version') version = { 'clientMajor': version['clientVersion']['major'], 'clientMinor': version['clientVersion']['minor'], 'serverMajor': version['serverVersion']['major'], 'serverMinor': version['serverVersion']['minor'], } assert int(version['clientMajor']) == 1 and int(version['clientMinor']) >= 11, 'Invalid kubectl client version, ' \ 'minimal supported version: 1.11\n' \ 'If you are using GKE, run: gcloud components update' return version
def get(self): exitcode, output = subprocess.getstatusoutput( f'kubectl -n {self.instance.id} get deployment/{self.instance.id} -o yaml' ) if exitcode == 0: deployment = yaml.load(output) status = kubectl.get_item_detailed_status(deployment) ready = len(status.get('error', [])) == 0 status['pods'] = [] pods = kubectl.get('pods -l app=ckan', namespace=self.instance.id, required=False) image = None latest_operator_timestamp, latest_pod_name, latest_pod_status = None, None, None if pods: for pod in pods['items']: pod_operator_timestamp = pod['metadata']['annotations'][ 'ckan-cloud/operator-timestamp'] if not latest_operator_timestamp or latest_operator_timestamp < pod_operator_timestamp: latest_operator_timestamp = pod_operator_timestamp latest_pod_name = pod['metadata']['name'] pod_status = kubectl.get_item_detailed_status(pod) status_code, output = subprocess.getstatusoutput( f'kubectl -n {self.instance.id} logs {pod["metadata"]["name"]} -c ckan --tail 5', ) if status_code == 0: pod_status['logs'] = output else: pod_status['logs'] = None if not image: image = pod["spec"]["containers"][0]["image"] else: if image != pod["spec"]["containers"][0]["image"]: ready = False image = pod["spec"]["containers"][0]["image"] status['pods'].append(pod_status) if latest_pod_name == pod_status['name']: latest_pod_status = pod_status if not latest_pod_status or len( latest_pod_status.get( 'errors', [])) > 0 or latest_pod_status['logs'] is None: ready = False else: ready = False return dict(status, ready=ready, image=image, latest_pod_name=latest_pod_name, latest_operator_timestamp=latest_operator_timestamp) else: return {'ready': False, 'error': output}
def list_buckets(): """Returns list of buckets attached to CKAN Instances""" result = [] for item in kubectl.get('ckancloudckaninstance').get('items', []): bucket = item['spec'].get('bucket', {}).get(PROVIDER_ID) if not bucket: continue result.append({ 'instance_id': item['spec']['id'], 'bucket': bucket }) return result