def _scale_down_scale_up(deployment='ckan', namespace=None, replicas=1): logs.info('Scaling ckan replicas') kubectl.call(f'scale deployment {deployment} --replicas=0', namespace=namespace) kubectl.call(f'scale deployment {deployment} --replicas={replicas}', namespace=namespace) time.sleep(20)
def zk_put_configs(configs_dir): pod_name = kubectl.get('pods', '-l', 'app=provider-solr-solrcloud-zk', required=True)['items'][0]['metadata']['name'] for input_filename in glob.glob(f'{configs_dir}/**/*', recursive=True): if not os.path.isfile(input_filename): continue output_filename = '/configs' + input_filename.replace(configs_dir, '') print(f'{input_filename} --> {output_filename}') output_filepath = '' for output_filepart in output_filename.split('/')[:-1]: output_filepart = output_filepart.strip() if not output_filepart: continue output_filepath += f'/{output_filepart}' print(f'create {output_filepath} null') print( kubectl.call( f'exec {pod_name} zkCli.sh create {output_filepath} null')) print(f'copy {output_filename}') print(kubectl.call(f'cp {input_filename} {pod_name}:/tmp/zk_input')) print(f'create {output_filename}') print( kubectl.call( f"exec {pod_name} bash -- -c 'zkCli.sh create {output_filename} \"$(cat /tmp/zk_input)\"'" ))
def restart_solr_pods(show_zookeeper, solrcloud_only, force=False): pod_name = '--all' force = '--force --grace-period=0' if force else '' if show_zookeeper: kubectl.delete_items_by_labels(['pod'], {'app':'provider-solr-solrcloud-zk'}, 'ckan-cloud') return if solrcloud_only: kubectl.delete_items_by_labels(['pod'], {'app':'provider-solr-solrcloud-sc'}, 'ckan-cloud') return kubectl.call(f'delete pods {pod_name} {force}', 'ckan-cloud')
def delete(router_name): print(f'Deleting traefik router {router_name}') if all([ kubectl.call(f'delete --ignore-not-found -l ckan-cloud/router-name={router_name} deployment') == 0, kubectl.call(f'delete --ignore-not-found -l ckan-cloud/router-name={router_name} service') == 0, kubectl.call(f'delete --ignore-not-found -l ckan-cloud/router-name={router_name} secret') == 0, kubectl.call(f'delete --ignore-not-found -l ckan-cloud/router-name={router_name} configmap') == 0, kubectl.call(f'delete --ignore-not-found -l ckan-cloud/router-name={router_name} PersistentVolumeClaim') == 0, kubectl.call(f'delete --ignore-not-found -l ckan-cloud/router-name={router_name} CkanCloudRoute') == 0, kubectl.call(f'delete --ignore-not-found CkanCloudRouter {router_name}') == 0, ]): print('Removing finalizers') success = True routes = kubectl.get_items_by_labels('CkanCloudRoute', {'ckan-cloud/router-name': router_name}, required=False) if not routes: routes = [] for route in routes: route_name = route['metadata']['name'] if kubectl.call( f'patch CkanCloudRoute {route_name} -p \'{{"metadata":{{"finalizers":[]}}}}\' --type=merge', ) != 0: success = False if kubectl.get(f'CkanCloudRouter {router_name}', required=False): if kubectl.call( f'patch CkanCloudRouter {router_name} -p \'{{"metadata":{{"finalizers":[]}}}}\' --type=merge', ) != 0: success = False assert success else: raise Exception('Deletion failed')
def delete(instance_id, instance): tiller_namespace_name = _get_resource_name() ckan_helm_release_name = f'ckan-cloud-{instance_id}' errors = [] try: logs.info(f'Deleting helm release {ckan_helm_release_name}') helm_driver.delete(tiller_namespace_name, ckan_helm_release_name) except Exception as e: logs.warning(traceback.format_exc()) errors.append(f'Failed to delete helm release') if kubectl.call(f'delete --wait=false namespace {instance_id}') != 0: errors.append(f'Failed to delete namespace') assert len(errors) == 0, ', '.join(errors)
def delete(instance_id, instance): tiller_namespace_name = _get_tiller_namespace_name(instance_id, instance) release_name = _get_helm_release_name(instance_id, instance) logs.info(tiller_namespace_name=tiller_namespace_name, release_name=release_name) errors = [] try: logs.info(f'Deleting helm release {release_name}') delete_kwargs = dict(tiller_namespace=tiller_namespace_name, release_name=release_name) app_type = instance['spec'].get('app-type') if app_type: _get_app_type_manager(app_type).pre_delete_hook( instance_id, instance, delete_kwargs) helm_driver.delete(**delete_kwargs) if app_type: _get_app_type_manager(app_type).post_delete_hook( instance_id, instance, delete_kwargs) except Exception as e: logs.warning(traceback.format_exc()) errors.append(f'Failed to delete helm release') if kubectl.call(f'delete --wait=false namespace {instance_id}') != 0: errors.append(f'Failed to delete namespace') assert len(errors) == 0, ', '.join(errors)
def create(cls, *args, **kwargs): create_type = args[0] instance_id = args[-1] from ckan_cloud_operator.providers.db.manager import get_default_db_prefix db_prefix = kwargs['db_prefix'] if kwargs.get('db_prefix') else get_default_db_prefix() if create_type == 'from-gitlab': gitlab_repo = args[1] solr_config = args[2] db_name = instance_id datastore_name = f'{instance_id}-datastore' storage_path = kwargs.get('storage_path') or f'/ckan/{instance_id}' from_db_backups = kwargs.get('from_db_backups') logs.info(f'Creating Deis CKAN instance {instance_id}', gitlab_repo=gitlab_repo, solr_config=solr_config, db_name=db_name, datastore_name=datastore_name, storage_path=storage_path, from_db_backups=from_db_backups) if kwargs.get('use_private_gitlab_repo'): deploy_token_server = input('Gitlab registry url [default: registry.gitlab.com]: ') or 'registry.gitlab.com' deploy_token_username = input('Gitlab deploy token username: '******'Gitlab deploy token password: '******'delete secret private-gitlab-registry', namespace=instance_id) kubectl.call(f'create secret docker-registry private-gitlab-registry --docker-server={deploy_token_server} --docker-username={deploy_token_username} --docker-password={deploy_token_password}', namespace=instance_id) if from_db_backups: db_import_url, datastore_import_url = from_db_backups.split(',') migration_name = None success = False for event in ckan_db_migration_manager.migrate_deis_dbs(None, db_name, datastore_name, db_import_url=db_import_url, datastore_import_url=datastore_import_url, rerun=kwargs.get('rerun'), force=kwargs.get('force'), recreate_dbs=kwargs.get('recreate_dbs'), db_prefix=db_prefix): migration_name = ckan_db_migration_manager.get_event_migration_created_name(event) or migration_name success = ckan_db_migration_manager.print_event_exit_on_complete( event, f'DBs import {from_db_backups} -> {db_name}, {datastore_name}', soft_exit=True ) if success is not None: break assert success, f'Invalid DB migration success value ({success})' else: migration_name = None spec = { 'ckanPodSpec': {}, 'ckanContainerSpec': {'imageFromGitlab': gitlab_repo}, 'envvars': {'fromGitlab': gitlab_repo}, 'solrCloudCollection': { 'name': kwargs.get('solr_collection') or instance_id, 'configName': solr_config }, 'db': { 'name': db_name, **({'fromDbMigration': migration_name} if migration_name else {}), **({'dbPrefix': db_prefix} if db_prefix else {}) }, 'datastore': { 'name': datastore_name, **({'fromDbMigration': migration_name} if migration_name else {}), **({'dbPrefix': db_prefix} if db_prefix else {}) }, 'storage': { 'path': storage_path, } } if kwargs.get('use_private_gitlab_repo'): spec['ckanContainerSpec']['imagePullSecrets'] = [{'name': 'private-gitlab-registry'}] elif create_type == 'from-gcloud-envvars': print(f'Creating Deis CKAN instance {instance_id} from gcloud envvars import') instance_env_yaml, image, solr_config, storage_path, instance_id = args[1:] db_migration_name = kwargs.get('db_migration_name') assert db_migration_name, 'creating from gcloud envvars without a db migration is not supported yet' if type(instance_env_yaml) == str: logs.info(f'Creating {instance_id}-envvars secret from file: {instance_env_yaml}') subprocess.check_call( f'kubectl -n ckan-cloud create secret generic {instance_id}-envvars --from-file=envvars.yaml={instance_env_yaml}', shell=True ) else: logs.info(f'Creating {instance_id}-envvars secret from inline string') kubectl.update_secret(f'{instance_id}-envvars', {'envvars.yaml': yaml.dump(instance_env_yaml, default_flow_style=False)}) spec = { 'ckanPodSpec': {}, 'ckanContainerSpec': {'image': image}, 'envvars': {'fromSecret': f'{instance_id}-envvars'}, 'solrCloudCollection': { 'name': instance_id, 'configName': solr_config }, 'db': { 'name': instance_id, 'fromDbMigration':db_migration_name, **({'dbPrefix': db_prefix} if db_prefix else {}) }, 'datastore': { 'name': f'{instance_id}-datastore', 'fromDbMigration': db_migration_name, **({'dbPrefix': db_prefix} if db_prefix else {}) }, 'storage': { 'path': storage_path } } else: raise NotImplementedError(f'invalid create type: {create_type}') instance_kind = ckan_manager.instance_kind() instance = { 'apiVersion': f'stable.viderum.com/v1', 'kind': instance_kind, 'metadata': { 'name': instance_id, 'namespace': 'ckan-cloud', 'finalizers': ['finalizer.stable.viderum.com'] }, 'spec': spec } subprocess.run('kubectl apply -f -', input=yaml.dump(instance).encode(), shell=True, check=True) return cls(instance_id, values=instance)
def _wait_instance_events(instance_id): import logging start_time = datetime.datetime.now() last_message = 0 logs.info('Waiting for instance events', start_time=start_time) missing_events = None while True: time.sleep(15) currently_missing, errors, ckan_logs = _check_instance_events( instance_id) if len(currently_missing) == 0: logs.info('All instance events completed successfully') break if currently_missing != missing_events: missing_events = currently_missing logs.info('Still waiting for', repr(sorted(missing_events))) start_time = datetime.datetime.now() time_passed = (datetime.datetime.now() - start_time).total_seconds() if time_passed - last_message >= 60: logs.info('%d seconds since started waiting' % time_passed) last_message += 60 if time_passed > int(os.environ.get('CCO_WAIT_TIMEOUT', 500)): failed_pods = [ item for item in kubectl.get(f'pods -n {instance_id}').get( 'items', []) if not all( stat.get('ready') for stat in item['status']['containerStatuses']) ] logs.info('*** SOMETHING WENT WRONG!!! ***') logs.info(100 * '#') logs.info(100 * '#') if not len(failed_pods): logs.info('But we could not get failing containers') logs.info( 'You may try increasing default wait timeout by setting CCO_WAIT_TIMEOUT environment variable [default: 500]' ) ckan_pod_name = [ item['metadata']['name'] for item in kubectl.get(f'pods -n {instance_id}').get( 'items', []) if item.get('metadata', {}).get( 'labels', {}).get('app') == 'ckan' ][0] _log_container_error('CONTAINER LOGS', ckan_pod_name, 'ckan') kubectl.call(f'logs {ckan_pod_name}', namespace=instance_id) logs.info('Numbe of Failed Pods: %s' % len(failed_pods)) for pod_meta in failed_pods: init_containers = pod_meta['status'].get( 'initContainerStatuses') pod_name = pod_meta['metadata']['name'] if init_containers is not None: logs.info('Checking Init Containers in %s' % pod_name) for i, init_container in enumerate(init_containers): if not init_container.get('ready'): container_name = pod_meta['spec'][ 'initContainers'][i]['name'] _log_container_error('INIT CONTAINER LOGS', pod_name, container_name) kubectl.call( f'logs {pod_name} -c {container_name}', namespace=instance_id) else: logs.info('Init Containers are fine in %s' % pod_name) logs.info('Checking Containers in %s' % pod_name) container_stats = pod_meta['status'].get('containerStatuses') no_log_statuses = ['PodInitializing'] pod_status = [ stat.get('state', {}).get('waiting', {}).get('reason') in no_log_statuses for stat in container_stats ] if all(pod_status): logs.info('Pod %s looks good describing:' % pod_name) _log_container_error('KUBECTL DESCRIBE POD', pod_name) kubectl.call(f'describe pod {pod_name}', namespace=instance_id) else: logs.info('Pod %s look did not start:' % pod_name) for i, container in enumerate(container_stats): container_name = pod_meta['spec']['containers'][i][ 'name'] _log_container_error('CONTAINER LOGS', pod_name, container_name) kubectl.call(f'logs {pod_name}', namespace=instance_id) logs.info(100 * '#') logs.info(100 * '#') raise Exception('timed out waiting for instance events')
def main(instance_id, old_db_prefix, new_db_prefix, down_time_approval_code, new_instance_id, gitlab_repo, solr_collection_name, storage_path): logs.info(instance_id=instance_id, old_db_prefix=old_db_prefix, new_db_prefix=new_db_prefix, down_time_approval_code=down_time_approval_code) instance = kubectl.get('ckancloudckaninstance', instance_id) current_db_prefix = instance['spec']['db'].get('dbPrefix', 'prod') current_datastore_prefix = instance['spec']['datastore'].get( 'dbPrefix', 'prod') assert current_db_prefix == current_datastore_prefix, 'different prefix for datastore and DB is not supported yet' db_name = instance['spec']['db']['name'] datastore_name = instance['spec']['datastore']['name'] logs.info(current_db_prefix=current_db_prefix, db_name=db_name, current_datastore_prefix=current_datastore_prefix, datastore_name=datastore_name) if current_db_prefix == old_db_prefix: if down_time_approval_code: _check_down_time_approval_code(instance_id, old_db_prefix, new_db_prefix, down_time_approval_code, db_name, datastore_name) logs.info( f'Deleting instance deployment (namespace={instance_id} deployment={instance_id}' ) kubectl.call(f'delete deployment {instance_id} --wait=false', namespace=instance_id) logs.info('Creating DB backups') assert gcloudsql_manager.create_backup(db_name), 'failed db backup' assert gcloudsql_manager.create_backup( datastore_name), 'failed datastore backup' db_backup_url, datastore_backup_url = _get_latest_backups( db_name, datastore_name) logs.important_log(logs.INFO, db_backup_url=db_backup_url) logs.important_log(logs.INFO, datastore_backup_url=datastore_backup_url) logs.info( 'Creating parameters file to trigger copy instance job to create the new instance' ) with open('copy_instance_params', 'w') as f: f.write(f'OLD_INSTANCE_ID={instance_id}\n' f'NEW_INSTANCE_ID={new_instance_id}\n' f'NEW_GITLAB_REPO={gitlab_repo}\n' f'NEW_DB_PREFIX={new_db_prefix}\n' f'IMPORT_DATE_PATH=\n' f'IMPORT_HOUR=\n' f'SKIP_MINIO_MIRROR=no\n' f'SKIP_CREATE=no\n' f'SKIP_ROUTER=no\n' f'USE_EXISTING_MIGRATION=no\n' f'NEW_SOLR_COLLECTION_NAME={solr_collection_name}\n' f'NEW_STORAGE_PATH={storage_path}\n' f'DATABASE_IMPORT_URL={db_backup_url}\n' f'DATASTORE_IMPORT_URL={datastore_backup_url}\n' f'DRY_RUN=yes\n') with open('change_instance_routes_params', 'w') as f: f.write(f'OLD_INSTANCE_ID={instance_id}\n' f'NEW_INSTANCE_ID={new_instance_id}\n' f'DRY_RUN=yes\n') logs.exit_great_success() else: down_time_approval_code = _create_down_time_approval_code( instance_id, old_db_prefix, new_db_prefix, db_name, datastore_name) logs.important_log( logs.INFO, f'DOWN_TIME_APPROVAL_CODE={down_time_approval_code}') logs.exit_great_success(quiet=True) else: raise NotImplementedError()
def set_image(instance_id, image_name, service='ckan', container_name=None): cont_name = container_name or service deployment_info = kubectl.call( f'set image deployment/{service} {cont_name}={image_name}', namespace=instance_id)
def kubectl_command(arg): from ckan_cloud_operator import kubectl exit(kubectl.call(' '.join(arg)))
def get_solr_pods(format=''): kubectl.call(f'get pods', 'ckan-cloud')
def _scale_down_scale_up(deployment='router-traefik-instances-default', replicas=1): kubectl.call(f'scale deployment {deployment} --replicas=0') kubectl.call(f'scale deployment {deployment} --replicas={replicas}')