def db_joined(): if is_relation_made('pgsql-nova-db') or \ is_relation_made('pgsql-neutron-db'): # error, postgresql is used e = ('Attempting to associate a mysql database when there is already ' 'associated a postgresql one') log(e, level=ERROR) raise Exception(e) if network_manager() in ['quantum', 'neutron']: config_neutron = True else: config_neutron = False if config('prefer-ipv6'): sync_db_with_multi_ipv6_addresses(config('database'), config('database-user'), relation_prefix='nova') if config_neutron: sync_db_with_multi_ipv6_addresses(config('neutron-database'), config('neutron-database-user'), relation_prefix='neutron') else: host = unit_get('private-address') relation_set(nova_database=config('database'), nova_username=config('database-user'), nova_hostname=host) if config_neutron: # XXX: Renaming relations from quantum_* to neutron_* here. relation_set(neutron_database=config('neutron-database'), neutron_username=config('neutron-database-user'), neutron_hostname=host)
def configure_volume(before_change=lambda: None, after_change=lambda: None): '''Set up storage (or don't) according to the charm's volume configuration. Returns the mount point or "ephemeral". before_change and after_change are optional functions to be called if the volume configuration changes. ''' config = get_config() if not config: hookenv.log('Failed to read volume configuration', hookenv.CRITICAL) raise VolumeConfigurationError() if config['ephemeral']: if os.path.ismount(config['mountpoint']): before_change() unmount_volume(config) after_change() return 'ephemeral' else: # persistent storage if os.path.ismount(config['mountpoint']): mounts = dict(managed_mounts()) if mounts.get(config['mountpoint']) != config['device']: before_change() unmount_volume(config) mount_volume(config) after_change() else: before_change() mount_volume(config) after_change() return config['mountpoint']
def create_pool(service, name, replicas=2): ''' Create a new RADOS pool ''' if pool_exists(service, name): log("Ceph pool {} already exists, skipping creation".format(name), level=WARNING) return # Calculate the number of placement groups based # on upstream recommended best practices. osds = get_osds(service) if osds: pgnum = (len(osds) * 100 / replicas) else: # NOTE(james-page): Default to 200 for older ceph versions # which don't support OSD query from cli pgnum = 200 cmd = [ 'ceph', '--id', service, 'osd', 'pool', 'create', name, str(pgnum) ] check_call(cmd) cmd = [ 'ceph', '--id', service, 'osd', 'pool', 'set', name, 'size', str(replicas) ] check_call(cmd)
def main(): try: hooks.execute(sys.argv) except UnregisteredHookError as e: log('Unknown hook {} - skipping.'.format(e)) set_os_workload_status(CONFIGS, REQUIRED_INTERFACES, charm_func=check_optional_relations)
def ensure_permissions(path, user, group, permissions, maxdepth=-1): """Ensure permissions for path. If path is a file, apply to file and return. If path is a directory, apply recursively (if required) to directory contents and return. :param user: user name :param group: group name :param permissions: octal permissions :param maxdepth: maximum recursion depth. A negative maxdepth allows infinite recursion and maxdepth=0 means no recursion. :returns: None """ if not os.path.exists(path): log("File '%s' does not exist - cannot set permissions" % (path), level=WARNING) return _user = pwd.getpwnam(user) os.chown(path, _user.pw_uid, grp.getgrnam(group).gr_gid) os.chmod(path, permissions) if maxdepth == 0: log("Max recursion depth reached - skipping further recursion", level=DEBUG) return elif maxdepth > 0: maxdepth -= 1 if os.path.isdir(path): contents = glob.glob("%s/*" % (path)) for c in contents: ensure_permissions(c, user=user, group=group, permissions=permissions, maxdepth=maxdepth)
def _install_oracle_jre_tarball(tarball): # Same directory as webupd8 to avoid surprising people, but it could # be anything. if 'jre-7u' in str(tarball): dest = '/usr/lib/jvm/java-7-oracle' else: dest = '/usr/lib/jvm/java-8-oracle' if not os.path.isdir(dest): host.mkdir(dest) jre_exists = os.path.exists(os.path.join(dest, 'bin', 'java')) config = hookenv.config() # Unpack the latest tarball if necessary. if config.get('oracle_jre_tarball', '') == tarball and jre_exists: hookenv.log('Already installed {}'.format(tarball)) else: hookenv.log('Unpacking {}'.format(tarball)) subprocess.check_call(['tar', '-xz', '-C', dest, '--strip-components=1', '-f', tarball]) config['oracle_jre_tarball'] = tarball # Set alternatives, so /usr/bin/java does what we want. for tool in ['java', 'javac']: tool_path = os.path.join(dest, 'bin', tool) subprocess.check_call(['update-alternatives', '--install', os.path.join('/usr/bin', tool), tool, tool_path, '1']) subprocess.check_call(['update-alternatives', '--set', tool, tool_path])
def emit_java_version(): # Log the version for posterity. Could be useful since Oracle JRE # security updates are not automated. version = subprocess.check_output(['java', '-version'], universal_newlines=True) for line in version.splitlines(): hookenv.log('JRE: {}'.format(line))
def stop(self): hookenv.log("Stopping HUE and Supervisor process") try: utils.run_as('hue', 'pkill', '-9', 'supervisor') utils.run_as('hue', 'pkill', '-9', 'hue') except subprocess.CalledProcessError: return
def _fetch_oracle_jre(): config = hookenv.config() url = config.get('private_jre_url', None) if url and config.get('retrieved_jre', None) != url: filename = os.path.join(hookenv.charm_dir(), 'lib', url.split('/')[-1]) if not filename.endswith('-linux-x64.tar.gz'): helpers.status_set('blocked', 'Invalid private_jre_url {}'.format(url)) raise SystemExit(0) helpers.status_set(hookenv.status_get(), 'Downloading Oracle JRE') hookenv.log('Oracle JRE URL is {}'.format(url)) urllib.request.urlretrieve(url, filename) config['retrieved_jre'] = url pattern = os.path.join(hookenv.charm_dir(), 'lib', 'server-jre-?u*-linux-x64.tar.gz') tarballs = glob.glob(pattern) if not (url or tarballs): helpers.status_set('blocked', 'private_jre_url not set and no local tarballs.') raise SystemExit(0) elif not tarballs: helpers.status_set('blocked', 'Oracle JRE tarball not found ({})'.format(pattern)) raise SystemExit(0) # Latest tarball by filename/version num. Lets hope they don't hit # 99 (currently at 76). tarball = sorted(tarballs)[-1] return tarball
def upgrade_charm(): # Trigger removal of PPA docker installation if it was previously set. set_state('config.changed.install_from_upstream') hookenv.atexit(remove_state, 'config.changed.install_from_upstream') cleanup_pre_snap_services() check_resources_for_upgrade_needed() # Remove the RC for nginx ingress if it exists if hookenv.config().get('ingress'): kubectl_success('delete', 'rc', 'nginx-ingress-controller') # Remove gpu.enabled state so we can reconfigure gpu-related kubelet flags, # since they can differ between k8s versions if is_state('kubernetes-worker.gpu.enabled'): remove_state('kubernetes-worker.gpu.enabled') try: disable_gpu() except ApplyNodeLabelFailed: # Removing node label failed. Probably the master is unavailable. # Proceed with the upgrade in hope GPUs will still be there. hookenv.log('Failed to remove GPU labels. Proceed with upgrade.') remove_state('kubernetes-worker.cni-plugins.installed') remove_state('kubernetes-worker.config.created') remove_state('kubernetes-worker.ingress.available') remove_state('worker.auth.bootstrapped') set_state('kubernetes-worker.restart-needed')
def update_apps(self): # Add all services disabled unless we have a joined relation # as marked by the respective state # Enabled by default: 'filebrowser', 'jobbrowser' disabled_services = [ 'beeswax', 'impala', 'security', 'rdbms', 'jobsub', 'pig', 'hbase', 'sqoop', 'zookeeper', 'metastore', 'spark', 'oozie', 'indexer', 'search'] for key in get_states(): if "joined" in key: relname = key.split('.')[0] if 'hive' in relname: disabled_services.remove('beeswax') disabled_services.remove('metastore') if 'spark' in relname: disabled_services.remove('spark') if 'oozie' in relname: disabled_services.remove('oozie') if 'zookeeper' in relname: disabled_services.remove('zookeeper') hue_config = ''.join((self.dist_config.path('hue'), '/desktop/conf/hue.ini')) services_string = ','.join(disabled_services) hookenv.log("Disabled apps {}".format(services_string)) utils.re_edit_in_place(hue_config, { r'.*app_blacklist=.*': ''.join(('app_blacklist=', services_string)) }) self.check_relations()
def contents_match(self, path): """Determines if the file content is the same. This is determined by comparing hashsum of the file contents and the saved hashsum. If there is no hashsum, then the content cannot be sure to be the same so treat them as if they are not the same. Otherwise, return True if the hashsums are the same, False if they are not the same. :param path: the file to check. """ checksum = file_hash(path) kv = unitdata.kv() stored_checksum = kv.get('hardening:%s' % path) if not stored_checksum: # If the checksum hasn't been generated, return False to ensure # the file is written and the checksum stored. log('Checksum for %s has not been calculated.' % path, level=DEBUG) return False elif stored_checksum != checksum: log('Checksum mismatch for %s.' % path, level=DEBUG) return False return True
def start_worker(kube_api, kube_control, auth_control, cni): ''' Start kubelet using the provided API and DNS info.''' servers = get_kube_api_servers(kube_api) # Note that the DNS server doesn't necessarily exist at this point. We know # what its IP will eventually be, though, so we can go ahead and configure # kubelet with that info. This ensures that early pods are configured with # the correct DNS even though the server isn't ready yet. dns = kube_control.get_dns() ingress_ip = get_ingress_address(kube_control) cluster_cidr = cni.get_config()['cidr'] if cluster_cidr is None: hookenv.log('Waiting for cluster cidr.') return creds = db.get('credentials') data_changed('kube-control.creds', creds) create_config(random.choice(servers), creds) configure_kubelet(dns, ingress_ip) configure_kube_proxy(servers, cluster_cidr) set_state('kubernetes-worker.config.created') restart_unit_services() update_kubelet_status() set_state('kubernetes-worker.label-config-required') remove_state('kubernetes-worker.restart-needed')
def is_compliant(self, path): """Checks if the directory is compliant. Used to determine if the path specified and all of its children directories are in compliance with the check itself. :param path: the directory path to check :returns: True if the directory tree is compliant, otherwise False. """ if not os.path.isdir(path): log('Path specified %s is not a directory.' % path, level=ERROR) raise ValueError("%s is not a directory." % path) if not self.recursive: return super(DirectoryPermissionAudit, self).is_compliant(path) compliant = True for root, dirs, _ in os.walk(path): if len(dirs) > 0: continue if not super(DirectoryPermissionAudit, self).is_compliant(root): compliant = False continue return compliant
def templates_match(self, path): """Determines if the template files are the same. The template file equality is determined by the hashsum of the template files themselves. If there is no hashsum, then the content cannot be sure to be the same so treat it as if they changed. Otherwise, return whether or not the hashsums are the same. :param path: the path to check :returns: boolean """ template_path = get_template_path(self.template_dir, path) key = 'hardening:template:%s' % template_path template_checksum = file_hash(template_path) kv = unitdata.kv() stored_tmplt_checksum = kv.get(key) if not stored_tmplt_checksum: kv.set(key, template_checksum) kv.flush() log('Saved template checksum for %s.' % template_path, level=DEBUG) # Since we don't have a template checksum, then assume it doesn't # match and return that the template is different. return False elif stored_tmplt_checksum != template_checksum: kv.set(key, template_checksum) kv.flush() log('Updated template checksum for %s.' % template_path, level=DEBUG) return False # Here the template hasn't changed based upon the calculated # checksum of the template and what was previously stored. return True
def user(self, name): try: user = pwd.getpwnam(name) except KeyError: log('Unknown user %s' % name, level=ERROR) user = None self._user = user
def is_compliant(self, path): """Checks if the path is in compliance. Used to determine if the path specified meets the necessary requirements to be in compliance with the check itself. :param path: the file path to check :returns: True if the path is compliant, False otherwise. """ stat = self._get_stat(path) user = self.user group = self.group compliant = True if stat.st_uid != user.pw_uid or stat.st_gid != group.gr_gid: log('File %s is not owned by %s:%s.' % (path, user.pw_name, group.gr_name), level=INFO) compliant = False # POSIX refers to the st_mode bits as corresponding to both the # file type and file permission bits, where the least significant 12 # bits (o7777) are the suid (11), sgid (10), sticky bits (9), and the # file permission bits (8-0) perms = stat.st_mode & 0o7777 if perms != self.mode: log('File %s has incorrect permissions, currently set to %s' % (path, oct(stat.st_mode & 0o7777)), level=INFO) compliant = False return compliant
def install(): status_set('maintenance', 'Executing pre-install') execd_preinstall() src = config('openstack-origin') if (lsb_release()['DISTRIB_CODENAME'] == 'precise' and src == 'distro'): src = 'cloud:precise-icehouse' configure_installation_source(src) status_set('maintenance', 'Installing apt packages') apt_update(fatal=True) apt_install('python-six', fatal=True) # Force upgrade if valid_plugin(): apt_install(filter_installed_packages(get_early_packages()), fatal=True) apt_install(filter_installed_packages(get_packages()), fatal=True) else: message = 'Please provide a valid plugin config' log(message, level=ERROR) status_set('blocked', message) sys.exit(1) # Legacy HA for Icehouse update_legacy_ha_files() # Install systemd overrides to remove service startup race between # n-gateway and n-cloud-controller services. install_systemd_override()
def _run_apt_command(cmd, fatal=False): """ Run an APT command, checking output and retrying if the fatal flag is set to True. :param: cmd: str: The apt command to run. :param: fatal: bool: Whether the command's output should be checked and retried. """ env = os.environ.copy() if 'DEBIAN_FRONTEND' not in env: env['DEBIAN_FRONTEND'] = 'noninteractive' if fatal: retry_count = 0 result = None # If the command is considered "fatal", we need to retry if the apt # lock was not acquired. while result is None or result == APT_NO_LOCK: try: result = subprocess.check_call(cmd, env=env) except subprocess.CalledProcessError as e: retry_count = retry_count + 1 if retry_count > APT_NO_LOCK_RETRY_COUNT: raise result = e.returncode log("Couldn't acquire DPKG lock. Will retry in {} seconds." "".format(APT_NO_LOCK_RETRY_DELAY)) time.sleep(APT_NO_LOCK_RETRY_DELAY) else: subprocess.call(cmd, env=env)
def del_bridge_port(name, port): ''' Delete a port from the named openvswitch bridge ''' log('Deleting port {} from bridge {}'.format(port, name)) subprocess.check_call(["ovs-vsctl", "--", "--if-exists", "del-port", name, port]) subprocess.check_call(["ip", "link", "set", port, "down"]) subprocess.check_call(["ip", "link", "set", port, "promisc", "off"])
def add_ovsbridge_linuxbridge(name, bridge): ''' Add linux bridge to the named openvswitch bridge :param name: Name of ovs bridge to be added to Linux bridge :param bridge: Name of Linux bridge to be added to ovs bridge :returns: True if veth is added between ovs bridge and linux bridge, False otherwise''' try: import netifaces except ImportError: if six.PY2: apt_install('python-netifaces', fatal=True) else: apt_install('python3-netifaces', fatal=True) import netifaces ovsbridge_port = "veth-" + name linuxbridge_port = "veth-" + bridge log('Adding linuxbridge {} to ovsbridge {}'.format(bridge, name), level=INFO) interfaces = netifaces.interfaces() for interface in interfaces: if interface == ovsbridge_port or interface == linuxbridge_port: log('Interface {} already exists'.format(interface), level=INFO) return with open('/etc/network/interfaces.d/{}.cfg'.format( linuxbridge_port), 'w') as config: config.write(BRIDGE_TEMPLATE.format(linuxbridge_port=linuxbridge_port, ovsbridge_port=ovsbridge_port, bridge=bridge)) subprocess.check_call(["ifup", linuxbridge_port]) add_bridge_port(name, linuxbridge_port)
def is_request_complete_for_rid(request, rid): """Check if a given request has been completed on the given relation @param request: A CephBrokerRq object @param rid: Relation ID """ broker_key = get_broker_rsp_key() for unit in related_units(rid): rdata = relation_get(rid=rid, unit=unit) if rdata.get(broker_key): rsp = CephBrokerRsp(rdata.get(broker_key)) if rsp.request_id == request.request_id: if not rsp.exit_code: return True else: # The remote unit sent no reply targeted at this unit so either the # remote ceph cluster does not support unit targeted replies or it # has not processed our request yet. if rdata.get('broker_rsp'): request_data = json.loads(rdata['broker_rsp']) if request_data.get('request-id'): log('Ignoring legacy broker_rsp without unit key as remote ' 'service supports unit specific replies', level=DEBUG) else: log('Using legacy broker_rsp as remote service does not ' 'supports unit specific replies', level=DEBUG) rsp = CephBrokerRsp(rdata['broker_rsp']) if not rsp.exit_code: return True return False
def apply_node_labels(): ''' Parse the labels configuration option and apply the labels to the node. ''' # scrub and try to format an array from the configuration option config = hookenv.config() user_labels = _parse_labels(config.get('labels')) # For diffing sake, iterate the previous label set if config.previous('labels'): previous_labels = _parse_labels(config.previous('labels')) hookenv.log('previous labels: {}'.format(previous_labels)) else: # this handles first time run if there is no previous labels config previous_labels = _parse_labels("") # Calculate label removal for label in previous_labels: if label not in user_labels: hookenv.log('Deleting node label {}'.format(label)) _apply_node_label(label, delete=True) # if the label is in user labels we do nothing here, it will get set # during the atomic update below. # Atomically set a label for label in user_labels: _apply_node_label(label, overwrite=True) # Set label for application name _apply_node_label('juju-application={}'.format(hookenv.service_name()), overwrite=True)
def pip_install(package, fatal=False, upgrade=False, venv=None, constraints=None, **options): """Install a python package""" if venv: venv_python = os.path.join(venv, 'bin/pip') command = [venv_python, "install"] else: command = ["install"] available_options = ('proxy', 'src', 'log', 'index-url', ) for option in parse_options(options, available_options): command.append(option) if upgrade: command.append('--upgrade') if constraints: command.extend(['-c', constraints]) if isinstance(package, list): command.extend(package) else: command.append(package) log("Installing {} package with options: {}".format(package, command)) if venv: subprocess.check_call(command) else: pip_execute(command)
def _apply_node_label(label, delete=False, overwrite=False): ''' Invoke kubectl to apply node label changes ''' nodename = get_node_name() # TODO: Make this part of the kubectl calls instead of a special string cmd_base = 'kubectl --kubeconfig={0} label node {1} {2}' if delete is True: label_key = label.split('=')[0] cmd = cmd_base.format(kubeconfig_path, nodename, label_key) cmd = cmd + '-' else: cmd = cmd_base.format(kubeconfig_path, nodename, label) if overwrite: cmd = '{} --overwrite'.format(cmd) cmd = cmd.split() deadline = time.time() + 180 while time.time() < deadline: code = subprocess.call(cmd) if code == 0: break hookenv.log('Failed to apply label %s, exit code %d. Will retry.' % ( label, code)) time.sleep(1) else: msg = 'Failed to apply label %s' % label raise ApplyNodeLabelFailed(msg)
def update_nova_consoleauth_config(): """ Configure nova-consoleauth pacemaker resources """ relids = relation_ids('ha') if len(relids) == 0: log('Related to {} ha services'.format(len(relids)), level='DEBUG') ha_relid = None data = {} else: ha_relid = relids[0] data = relation_get(rid=ha_relid) or {} # initialize keys in case this is a new dict data.setdefault('delete_resources', []) for k in ['colocations', 'init_services', 'resources', 'resource_params']: data.setdefault(k, {}) if config('single-nova-consoleauth') and console_attributes('protocol'): for item in ['vip_consoleauth', 'res_nova_consoleauth']: try: data['delete_resources'].remove(item) except ValueError: pass # nothing to remove, we are good # the new pcmkr resources have to be added to the existing ones data['colocations']['vip_consoleauth'] = COLO_CONSOLEAUTH data['init_services']['res_nova_consoleauth'] = 'nova-consoleauth' data['resources']['res_nova_consoleauth'] = AGENT_CONSOLEAUTH data['resource_params']['res_nova_consoleauth'] = AGENT_CA_PARAMS for rid in relation_ids('ha'): relation_set(rid, **data) # nova-consoleauth will be managed by pacemaker, so mark it as manual if relation_ids('ha'): with open(NOVA_CONSOLEAUTH_OVERRIDE, 'w') as fp: fp.write('manual\n') fp.flush() elif (not config('single-nova-consoleauth') and console_attributes('protocol')): for item in ['vip_consoleauth', 'res_nova_consoleauth']: if item not in data['delete_resources']: data['delete_resources'].append(item) # remove them from the rel, so they aren't recreated when the hook # is recreated data['colocations'].pop('vip_consoleauth', None) data['init_services'].pop('res_nova_consoleauth', None) data['resources'].pop('res_nova_consoleauth', None) data['resource_params'].pop('res_nova_consoleauth', None) for rid in relation_ids('ha'): relation_set(rid, **data) try: os.remove(NOVA_CONSOLEAUTH_OVERRIDE) except FileNotFoundError as e: log(str(e), level='DEBUG')
def install_hadoop(self): hadoop_version = self.dist_config.hadoop_version try: jujuresources.install('hadoop-%s-%s' % (hadoop_version, self.cpu_arch), destination=self.dist_config.path('hadoop'), skip_top_level=True) except KeyError: hookenv.log("Falling back to non-version specific download of hadoop...") jujuresources.install('hadoop-%s' % (self.cpu_arch), destination=self.dist_config.path('hadoop'), skip_top_level=True) # Install our lzo compression codec if it's defined in resources.yaml try: jujuresources.install('hadoop-lzo-%s' % self.cpu_arch, destination=self.dist_config.path('hadoop'), skip_top_level=False) unitdata.kv().set('hadoop.lzo.installed', True) except KeyError: msg = ("The hadoop-lzo-%s resource was not found." "LZO compression will not be available." % self.cpu_arch) hookenv.log(msg)
def changed(self): hookenv.log('Data: {}'.format({ 'local_spec': self.local_spec(), 'remote_spec': self.remote_spec(), 'clustername': self.clustername(), 'namenodes': self.namenodes(), 'port': self.port(), 'webhdfs_port': self.webhdfs_port(), 'hosts_map': self.hosts_map(), 'local_hostname': self.local_hostname(), })) conv = self.conversation() available = all([ self.remote_spec() is not None, self.hosts_map(), self.clustername(), self.namenodes(), self.port(), self.webhdfs_port(), self.ssh_key()]) spec_mismatch = available and not self._spec_match() visible = self.local_hostname() in self.hosts_map().values() ready = available and visible conv.toggle_state('{relation_name}.spec.mismatch', spec_mismatch) conv.toggle_state('{relation_name}.ready', ready and not spec_mismatch) hookenv.log('States: {}'.format(set(get_states().keys())))
def db_changed(): if 'shared-db' not in CONFIGS.complete_contexts(): log('shared-db relation incomplete. Peer not ready?') return CONFIGS.write_all() leader_init_db_if_ready()
def write(self): try: nagios_uid = pwd.getpwnam('nagios').pw_uid nagios_gid = grp.getgrnam('nagios').gr_gid except: log("Nagios user not set up, nrpe checks not updated") return if not os.path.exists(NRPE.nagios_logdir): os.mkdir(NRPE.nagios_logdir) os.chown(NRPE.nagios_logdir, nagios_uid, nagios_gid) nrpe_monitors = {} monitors = {"monitors": {"remote": {"nrpe": nrpe_monitors}}} for nrpecheck in self.checks: nrpecheck.write(self.nagios_context, self.hostname, self.nagios_servicegroups) nrpe_monitors[nrpecheck.shortname] = { "command": nrpecheck.command, } service('restart', 'nagios-nrpe-server') for rid in relation_ids("local-monitors"): relation_set(relation_id=rid, monitors=yaml.dump(monitors))
def render_and_launch_ingress(): ''' Launch the Kubernetes ingress controller & default backend (404) ''' config = hookenv.config() # need to test this in case we get in # here from a config change to the image if not config.get('ingress'): return context = {} context['arch'] = arch() addon_path = '/root/cdk/addons/{}' context['juju_application'] = hookenv.service_name() # If present, workers will get the ingress containers from the configured # registry. Otherwise, we'll set an appropriate upstream image registry. registry_location = get_registry_location() context['defaultbackend_image'] = config.get('default-backend-image') if (context['defaultbackend_image'] == "" or context['defaultbackend_image'] == "auto"): if registry_location: backend_registry = registry_location else: backend_registry = 'k8s.gcr.io' if context['arch'] == 's390x': context['defaultbackend_image'] = \ "{}/defaultbackend-s390x:1.4".format(backend_registry) elif context['arch'] == 'arm64': context['defaultbackend_image'] = \ "{}/defaultbackend-arm64:1.5".format(backend_registry) else: context['defaultbackend_image'] = \ "{}/defaultbackend-amd64:1.5".format(backend_registry) # Render the ingress daemon set controller manifest context['ssl_chain_completion'] = config.get( 'ingress-ssl-chain-completion') context['enable_ssl_passthrough'] = config.get( 'ingress-ssl-passthrough') context['ingress_image'] = config.get('nginx-image') if context['ingress_image'] == "" or context['ingress_image'] == "auto": if registry_location: nginx_registry = registry_location else: nginx_registry = 'quay.io' images = {'amd64': 'kubernetes-ingress-controller/nginx-ingress-controller-amd64:0.25.1', # noqa 'arm64': 'kubernetes-ingress-controller/nginx-ingress-controller-arm64:0.25.1', # noqa 's390x': 'kubernetes-ingress-controller/nginx-ingress-controller-s390x:0.20.0', # noqa 'ppc64el': 'kubernetes-ingress-controller/nginx-ingress-controller-ppc64le:0.20.0', # noqa } context['ingress_image'] = '{}/{}'.format(nginx_registry, images.get(context['arch'], images['amd64'])) kubelet_version = get_version('kubelet') if kubelet_version < (1, 9): context['daemonset_api_version'] = 'extensions/v1beta1' context['deployment_api_version'] = 'extensions/v1beta1' elif kubelet_version < (1, 16): context['daemonset_api_version'] = 'apps/v1beta2' context['deployment_api_version'] = 'extensions/v1beta1' else: context['daemonset_api_version'] = 'apps/v1' context['deployment_api_version'] = 'apps/v1' manifest = addon_path.format('ingress-daemon-set.yaml') render('ingress-daemon-set.yaml', manifest, context) hookenv.log('Creating the ingress daemon set.') try: kubectl('apply', '-f', manifest) except CalledProcessError as e: hookenv.log(e) hookenv.log('Failed to create ingress controller. Will attempt again next update.') # noqa hookenv.close_port(80) hookenv.close_port(443) return # Render the default http backend (404) deployment manifest # needs to happen after ingress-daemon-set since that sets up the namespace manifest = addon_path.format('default-http-backend.yaml') render('default-http-backend.yaml', manifest, context) hookenv.log('Creating the default http backend.') try: kubectl('apply', '-f', manifest) except CalledProcessError as e: hookenv.log(e) hookenv.log('Failed to create default-http-backend. Will attempt again next update.') # noqa hookenv.close_port(80) hookenv.close_port(443) return set_state('kubernetes-worker.ingress.available') hookenv.open_port(80) hookenv.open_port(443)
def main(): try: hooks.execute(sys.argv) except UnregisteredHookError as e: log("Unknown hook {} - skipping.".format(e))
def register(self): """Register this GitLab runner with the GitLab CI server.""" if self.gitlab_token and self.gitlab_uri: hookenv.log("Registering GitLab runner with {}".format( self.gitlab_uri)) hookenv.status_set("maintenance", "Registering with GitLab") # Docker executor command = [ "/usr/bin/gitlab-runner", "register", "--non-interactive", "--url", "{}".format(self.gitlab_uri), "--registration-token", "{}".format(self.gitlab_token), "--name", "{}-docker".format(self.hostname), "--tag-list", "docker", "--executor", "docker", "--docker-image", "ubuntu:latest", ] subprocess.check_call(command, stderr=subprocess.STDOUT) # LXD executor command = [ "/usr/bin/gitlab-runner", "register", "--non-interactive", "--url", "{}".format(self.gitlab_uri), "--registration-token", "{}".format(self.gitlab_token), "--name", "{}-lxd".format(self.hostname), "--tag-list", "lxd", "--executor", "custom", "--builds-dir", "/builds", "--cache-dir", "/cache", "--custom-run-exec", "/opt/lxd-executor/run.sh", "--custom-prepare-exec", "/opt/lxd-executor/prepare.sh", "--custom-cleanup-exec", "/opt/lxd-executor/cleanup.sh", ] subprocess.check_call(command, stderr=subprocess.STDOUT) else: hookenv.log( "Could not register gitlab runner due to missing token or uri") hookenv.status_set("blocked", "Unregistered due to missing token or URI") return False hookenv.status_set( "active", "Registered with {}".format(self.gitlab_uri.lstrip("http://"))) return True
def __call__(self): bind_port = config('bind-port') workers = config('workers') if workers == 0: import multiprocessing workers = multiprocessing.cpu_count() if config('prefer-ipv6'): proxy_ip = ('[{}]'.format( get_ipv6_addr(exc_list=[config('vip')])[0])) memcached_ip = 'ip6-localhost' else: proxy_ip = get_host_ip(unit_get('private-address')) memcached_ip = get_host_ip(unit_get('private-address')) ctxt = { 'proxy_ip': proxy_ip, 'memcached_ip': memcached_ip, 'bind_port': determine_api_port(bind_port, singlenode_mode=True), 'workers': workers, 'operator_roles': config('operator-roles'), 'delay_auth_decision': config('delay-auth-decision'), 'node_timeout': config('node-timeout'), 'recoverable_node_timeout': config('recoverable-node-timeout'), 'log_headers': config('log-headers'), 'statsd_host': config('statsd-host'), 'statsd_port': config('statsd-port'), 'statsd_sample_rate': config('statsd-sample-rate'), 'static_large_object_segments': config('static-large-object-segments') } admin_key = leader_get('swauth-admin-key') if admin_key is not None: ctxt['swauth_admin_key'] = admin_key if config('debug'): ctxt['log_level'] = 'DEBUG' else: ctxt['log_level'] = 'INFO' # Instead of duplicating code lets use charm-helpers to set signing_dir # TODO(hopem): refactor this context handler to use charm-helpers # code. _ctxt = IdentityServiceContext(service='swift', service_user='******')() signing_dir = _ctxt.get('signing_dir') if signing_dir: ctxt['signing_dir'] = signing_dir ctxt['ssl'] = False auth_type = config('auth-type') ctxt['auth_type'] = auth_type auth_host = config('keystone-auth-host') admin_user = config('keystone-admin-user') admin_password = config('keystone-admin-user') if (auth_type == 'keystone' and auth_host and admin_user and admin_password): log('Using user-specified Keystone configuration.') ks_auth = { 'auth_type': 'keystone', 'auth_protocol': config('keystone-auth-protocol'), 'keystone_host': auth_host, 'auth_port': config('keystone-auth-port'), 'service_user': admin_user, 'service_password': admin_password, 'service_tenant': config('keystone-admin-tenant-name'), } ctxt.update(ks_auth) for relid in relation_ids('identity-service'): log('Using Keystone configuration from identity-service.') for unit in related_units(relid): ks_auth = { 'auth_type': 'keystone', 'auth_protocol': relation_get('auth_protocol', unit, relid) or 'http', 'service_protocol': relation_get('service_protocol', unit, relid) or 'http', 'keystone_host': relation_get('auth_host', unit, relid), 'service_host': relation_get('service_host', unit, relid), 'auth_port': relation_get('auth_port', unit, relid), 'service_user': relation_get('service_username', unit, relid), 'service_password': relation_get('service_password', unit, relid), 'service_tenant': relation_get('service_tenant', unit, relid), 'service_port': relation_get('service_port', unit, relid), 'admin_token': relation_get('admin_token', unit, relid), 'api_version': relation_get('api_version', unit, relid) or '2', } if ks_auth['api_version'] == '3': ks_auth['admin_domain_id'] = relation_get( 'admin_domain_id', unit, relid) ks_auth['service_tenant_id'] = relation_get( 'service_tenant_id', unit, relid) ks_auth['admin_domain_name'] = relation_get( 'service_domain', unit, relid) ks_auth['admin_tenant_name'] = relation_get( 'service_tenant', unit, relid) ctxt.update(ks_auth) if config('prefer-ipv6'): for key in ['keystone_host', 'service_host']: host = ctxt.get(key) if host: ctxt[key] = format_ipv6_addr(host) return ctxt
def amqp_changed(): if 'amqp' not in CONFIGS.complete_contexts(): log('amqp relation incomplete. Peer not ready?') return CONFIGS.write(ck_utils.CLOUDKITTY_CONF)
def config_changed(): configs = register_configs() configs.write_all() if config('cplane-version') == "1.3.5": import pkg_resources NEUTRON_ENTRY_POINT = "/usr/lib/python2.7/dist-packages/neutron-" \ + pkg_resources.get_distribution('neutron').\ version + ".egg-info/entry_points.txt" cplane_config(neutron_config, NEUTRON_ENTRY_POINT) mtu_string = config('intf-mtu') if mtu_string: intf_mtu = mtu_string.split(',') for line in intf_mtu: interface = line.split('=') log("Change request for mtu for interface {} = {}".format( interface[0], interface[1])) change_iface_config(interface[0], 'mtu', interface[1]) tso_string = config('tso-flag') if tso_string: intf_tso = tso_string.split(',') for line in intf_tso: interface = line.split('=') log("Change request for tso for interface {} = {}".format( interface[0], interface[1])) change_iface_config(interface[0], 'tso', interface[1]) gso_string = config('gso-flag') if gso_string: intf_gso = gso_string.split(',') for line in intf_gso: interface = line.split('=') log("Change request for gso for interface {} = {}".format( interface[0], interface[1])) change_iface_config(interface[0], 'gso', interface[1]) rx_string = config('rx-flag') if rx_string: intf_rx = rx_string.split(',') for line in intf_rx: interface = line.split('=') log("Change request for rx for interface {} = {}".format( interface[0], interface[1])) change_iface_config(interface[0], 'rx', interface[1]) tx_string = config('tx-flag') if tx_string: intf_tx = tx_string.split(',') for line in intf_tx: interface = line.split('=') log("Change request for tx for interface {} = {}".format( interface[0], interface[1])) change_iface_config(interface[0], 'tx', interface[1]) sg_string = config('sg-flag') if sg_string: intf_sg = sg_string.split(',') for line in intf_sg: interface = line.split('=') log("Change request for sg for interface {} = {}".format( interface[0], interface[1])) change_iface_config(interface[0], 'sg', interface[1]) ufo_string = config('ufo-flag') if ufo_string: intf_ufo = ufo_string.split(',') for line in intf_ufo: interface = line.split('=') log("Change request for ufo for interface {} = {}".format( interface[0], interface[1])) change_iface_config(interface[0], 'ufo', interface[1]) gro_string = config('gro-flag') if gro_string: intf_gro = gro_string.split(',') for line in intf_gro: interface = line.split('=') log("Change request for gro for interface {} = {}".format( interface[0], interface[1])) change_iface_config(interface[0], 'gro', interface[1]) lro_string = config('lro-flag') if lro_string: intf_lro = lro_string.split(',') for line in intf_lro: interface = line.split('=') log("Change request for lro for interface {} = {}".format( interface[0], interface[1])) change_iface_config(interface[0], 'lro', interface[1]) add_controller_ip()
def restart_unit_services(): '''Restart worker services.''' hookenv.log('Restarting kubelet and kube-proxy.') services = ['kube-proxy', 'kubelet'] for service in services: service_restart('snap.%s.daemon' % service)
def get_context(): ctx = {} ctx["module"] = MODULE ctx["log_level"] = config.get("log-level", "SYS_NOTICE") ctx["container_registry"] = config.get("docker-registry") ctx["contrail_version_tag"] = config.get("image-tag") ctx["contrail_version"] = common_utils.get_contrail_version() # self IP-s kubemanager_ip_list = list() for rid in relation_ids("kubernetes-master-cluster"): for unit in related_units(rid): ip = relation_get("private-address", unit, rid) if ip: kubemanager_ip_list.append(ip) # add it's own ip address kubemanager_ip_list.append(common_utils.get_ip()) ctx["kubemanager_servers"] = kubemanager_ip_list # get contrail configuration from relation ips = common_utils.json_loads(config.get("controller_ips"), list()) data_ips = common_utils.json_loads(config.get("controller_data_ips"), list()) ctx["controller_servers"] = ips ctx["control_servers"] = data_ips ips = common_utils.json_loads(config.get("analytics_servers"), list()) ctx["analytics_servers"] = ips # TODO: add ssl ctx["cluster_name"] = config.get("cluster_name") ctx["cluster_project"] = config.get("cluster_project") ctx["cluster_network"] = config.get("cluster_network") ctx["pod_subnets"] = config.get("pod_subnets") ctx["ip_fabric_subnets"] = config.get("ip_fabric_subnets") ctx["service_subnets"] = config.get("service_subnets") ctx["ip_fabric_forwarding"] = config.get("ip_fabric_forwarding") ctx["ip_fabric_snat"] = config.get("ip_fabric_snat") ctx["host_network_service"] = config.get("host_network_service") ctx["public_fip_pool"] = config.get("public_fip_pool") ctx.update(common_utils.json_loads(config.get("orchestrator_info"), dict())) if not ctx.get("cloud_orchestrators"): ctx["cloud_orchestrators"] = list( ctx.get("cloud_orchestrator")) if ctx.get( "cloud_orchestrator") else list() # TODO: switch to use context for this ctx["kube_manager_token"] = leader_get("kube_manager_token") if config.get("kubernetes_api_hostname") and config.get( "kubernetes_api_secure_port"): ctx["kubernetes_api_server"] = config.get("kubernetes_api_hostname") ctx["kubernetes_api_secure_port"] = config.get( "kubernetes_api_secure_port") else: ctx["kubernetes_api_server"] = config.get("kubernetes_api_server") ctx["kubernetes_api_secure_port"] = config.get("kubernetes_api_port") ctx["nested_mode"] = config.get("nested_mode") if ctx["nested_mode"]: # TODO: create KUBERNETES_NESTED_VROUTER_VIP link-local services in Contrail via config API ctx["nested_mode_config"] = common_utils.json_loads( config.get("nested_mode_config"), dict()) ctx["config_analytics_ssl_available"] = common_utils.is_config_analytics_ssl_available( ) ctx["logging"] = docker_utils.render_logging() log("CTX: {}".format(ctx)) ctx.update(common_utils.json_loads(config.get("auth_info"), dict())) return ctx
def launch_default_ingress_controller(): ''' Launch the Kubernetes ingress controller & default backend (404) ''' context = {} context['arch'] = arch() addon_path = '/root/cdk/addons/{}' # Render the default http backend (404) replicationcontroller manifest manifest = addon_path.format('default-http-backend.yaml') render('default-http-backend.yaml', manifest, context) hookenv.log('Creating the default http backend.') try: kubectl('apply', '-f', manifest) except CalledProcessError as e: hookenv.log(e) hookenv.log( 'Failed to create default-http-backend. Will attempt again next update.' ) # noqa hookenv.close_port(80) hookenv.close_port(443) return # Render the ingress replication controller manifest context['ingress_image'] = \ "gcr.io/google_containers/nginx-ingress-controller:0.9.0-beta.13" if arch() == 's390x': context['ingress_image'] = \ "docker.io/cdkbot/nginx-ingress-controller-s390x:0.9.0-beta.13" manifest = addon_path.format('ingress-replication-controller.yaml') render('ingress-replication-controller.yaml', manifest, context) hookenv.log('Creating the ingress replication controller.') try: kubectl('apply', '-f', manifest) except CalledProcessError as e: hookenv.log(e) hookenv.log( 'Failed to create ingress controller. Will attempt again next update.' ) # noqa hookenv.close_port(80) hookenv.close_port(443) return set_state('kubernetes-worker.ingress.available') hookenv.open_port(80) hookenv.open_port(443)
def restart_on_munge_change2(): hookenv.log( 'restart_on_munge_change2(): file %s modified, restarting due to flag' % munge.MUNGE_KEY_PATH) host.service_restart(munge.MUNGE_SERVICE) flags.clear_flag('munge.changed_key_file')
# # Copyright 2016 Canonical Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import sys sys.path.append('hooks') from subprocess import CalledProcessError from charmhelpers.core.hookenv import action_get, log, action_fail from charmhelpers.contrib.storage.linux.ceph import rename_pool if __name__ == '__main__': name = action_get("pool-name") new_name = action_get("new-name") try: rename_pool(service='admin', old_name=name, new_name=new_name) except CalledProcessError as e: log(e) action_fail("Renaming pool failed with message: {}".format(e.message))
def kubectl(*args): ''' Run a kubectl cli command with a config file. Returns stdout and throws an error if the command fails. ''' command = ['kubectl', '--kubeconfig=' + kubeclientconfig_path] + list(args) hookenv.log('Executing {}'.format(command)) return check_output(command)
def update_status(): log('Updating status.')
def start_charm(): if not hookenv.is_leader(): hookenv.log("This unit is not a leader.") return False layer.status.maintenance('configuring container') image_info = layer.docker_resource.get_info('oci-image') service_name = hookenv.service_name() port = hookenv.config('port') profiles = endpoint_from_name('kubeflow-profiles').services()[0] profiles_host = profiles['service_name'] profiles_port = profiles['hosts'][0]['port'] model = os.environ['JUJU_MODEL_NAME'] layer.caas_base.pod_spec_set({ 'version': 2, 'serviceAccount': { 'global': True, 'rules': [ { 'apiGroups': [''], 'resources': ['namespaces'], 'verbs': ['get', 'list', 'create', 'delete'], }, { 'apiGroups': ['kubeflow.org'], 'resources': ['notebooks', 'poddefaults'], 'verbs': ['get', 'list', 'create', 'delete'], }, { 'apiGroups': [''], 'resources': ['persistentvolumeclaims'], 'verbs': ['create', 'delete', 'get', 'list'], }, { 'apiGroups': ['storage.k8s.io'], 'resources': ['storageclasses'], 'verbs': ['get', 'list', 'watch'], }, { 'apiGroups': [''], 'resources': ['pods', 'pods/log', 'secrets', 'services'], 'verbs': ['*'], }, { 'apiGroups': ['', 'apps', 'extensions'], 'resources': ['deployments', 'replicasets'], 'verbs': ['*'], }, { 'apiGroups': ['kubeflow.org'], 'resources': ['*'], 'verbs': ['*'] }, { 'apiGroups': ['batch'], 'resources': ['jobs'], 'verbs': ['*'] }, ], }, 'service': { 'annotations': { 'getambassador.io/config': yaml.dump_all([{ 'apiVersion': 'ambassador/v0', 'kind': 'Mapping', 'name': 'jupyter-web', 'prefix': '/jupyter/', 'service': f'{service_name}:{port}', 'timeout_ms': 30000, 'add_request_headers': { 'x-forwarded-prefix': '/jupyter' }, }]) } }, 'containers': [{ 'name': 'jupyter-web', 'imageDetails': { 'imagePath': image_info.registry_path, 'username': image_info.username, 'password': image_info.password, }, 'ports': [{ 'name': 'http', 'containerPort': port }], 'config': { 'USERID_HEADER': 'kubeflow-userid', 'USERID_PREFIX': '', 'KFAM': f'{profiles_host}.{model}.svc.cluster.local:{profiles_port}', }, 'files': [{ 'name': 'configs', 'mountPath': '/etc/config', 'files': { Path(filename).name: Path(filename).read_text() for filename in glob('files/*') }, }], }], }) layer.status.maintenance('creating container') set_flag('charm.started')
def launch_default_ingress_controller(): ''' Launch the Kubernetes ingress controller & default backend (404) ''' config = hookenv.config() # need to test this in case we get in # here from a config change to the image if not config.get('ingress'): return context = {} context['arch'] = arch() addon_path = '/root/cdk/addons/{}' context['defaultbackend_image'] = config.get('default-backend-image') if (context['defaultbackend_image'] == "" or context['defaultbackend_image'] == "auto"): if context['arch'] == 's390x': context['defaultbackend_image'] = \ "k8s.gcr.io/defaultbackend-s390x:1.4" elif context['arch'] == 'arm64': context['defaultbackend_image'] = \ "k8s.gcr.io/defaultbackend-arm64:1.4" else: context['defaultbackend_image'] = \ "k8s.gcr.io/defaultbackend:1.4" # Render the default http backend (404) replicationcontroller manifest manifest = addon_path.format('default-http-backend.yaml') render('default-http-backend.yaml', manifest, context) hookenv.log('Creating the default http backend.') try: kubectl('apply', '-f', manifest) except CalledProcessError as e: hookenv.log(e) hookenv.log('Failed to create default-http-backend. Will attempt again next update.') # noqa hookenv.close_port(80) hookenv.close_port(443) return # Render the ingress daemon set controller manifest context['ssl_chain_completion'] = config.get( 'ingress-ssl-chain-completion') context['ingress_image'] = config.get('nginx-image') if context['ingress_image'] == "" or context['ingress_image'] == "auto": images = {'amd64': 'quay.io/kubernetes-ingress-controller/nginx-ingress-controller:0.16.1', # noqa 'arm64': 'quay.io/kubernetes-ingress-controller/nginx-ingress-controller-arm64:0.16.1', # noqa 's390x': 'quay.io/kubernetes-ingress-controller/nginx-ingress-controller-s390x:0.16.1', # noqa 'ppc64el': 'quay.io/kubernetes-ingress-controller/nginx-ingress-controller-ppc64le:0.16.1', # noqa } context['ingress_image'] = images.get(context['arch'], images['amd64']) if get_version('kubelet') < (1, 9): context['daemonset_api_version'] = 'extensions/v1beta1' else: context['daemonset_api_version'] = 'apps/v1beta2' context['juju_application'] = hookenv.service_name() manifest = addon_path.format('ingress-daemon-set.yaml') render('ingress-daemon-set.yaml', manifest, context) hookenv.log('Creating the ingress daemon set.') try: kubectl('apply', '-f', manifest) except CalledProcessError as e: hookenv.log(e) hookenv.log('Failed to create ingress controller. Will attempt again next update.') # noqa hookenv.close_port(80) hookenv.close_port(443) return set_state('kubernetes-worker.ingress.available') hookenv.open_port(80) hookenv.open_port(443)
def main(): try: hooks.execute(sys.argv) except UnregisteredHookError as e: log('Unknown hook {} - skipping.'.format(e)) assess_status(CONFIGS)
def request_db(database): database.configure('proto', 'admin', 'admin', prefix="proto") log("db requested")
def leader_elected(): log('Unit has been elected leader.', level=DEBUG) # When the local unit has been elected the leader, update the cron jobs # to ensure that the cron jobs are active on this unit. CONFIGS.write(TOKEN_FLUSH_CRON_FILE)
def reinstall_spark(): """ This is tricky. We want to fire on config or leadership changes, or when hadoop, sparkpeers, or zookeepers come and go. In the future this should fire when Cassandra or any other storage comes or goes. We always fire this method (or rather, when bigtop is ready and juju has elected a master). We then build a deployment-matrix and (re)install as things change. """ spark_master_host = leadership.leader_get('master-fqdn') if not spark_master_host: hookenv.status_set('maintenance', 'juju leader not elected yet') return mode = hookenv.config()['spark_execution_mode'] peers = None zks = None # If mode is standalone and ZK is ready, we are in HA. Do not consider # the master_host from juju leadership in our matrix. ZK handles this. if (mode == 'standalone' and is_state('zookeeper.ready')): spark_master_host = '' zk = RelationBase.from_state('zookeeper.ready') zks = zk.zookeepers() # peers are only used to set our MASTER_URL in standalone HA mode peers = get_spark_peers() deployment_matrix = { 'spark_master': spark_master_host, 'yarn_ready': is_state('hadoop.yarn.ready'), 'hdfs_ready': is_state('hadoop.hdfs.ready'), 'zookeepers': zks, 'peers': peers, } # If neither config nor our matrix is changing, there is nothing to do. if not (is_state('config.changed') or data_changed('deployment_matrix', deployment_matrix)): report_status() return # (Re)install based on our execution mode hookenv.status_set('maintenance', 'configuring spark in {} mode'.format(mode)) hookenv.log("Configuring spark with deployment matrix: {}".format( deployment_matrix)) if mode.startswith('yarn') and is_state('hadoop.yarn.ready'): install_spark_yarn() elif mode.startswith('local') or mode == 'standalone': install_spark_standalone(zks, peers) else: # Something's wrong (probably requested yarn without yarn.ready). remove_state('spark.started') report_status() return # restart services to pick up possible config changes spark = Spark() spark.stop() spark.start() set_state('spark.started') report_status()
def configure_port(website): log("poort functie") website.configure(port=hookenv.config('port'))
def render_and_log(template, conf_file, ctx, perms=0o600): """Returns True if configuration has been changed.""" log("Render and store new configuration: " + conf_file) try: with open(conf_file) as f: old_lines = set(f.readlines()) except Exception: old_lines = set() render(template, conf_file, ctx, perms=perms) with open(conf_file) as f: new_lines = set(f.readlines()) new_set = new_lines.difference(old_lines) old_set = old_lines.difference(new_lines) if not new_set and not old_set: log("Configuration file has not been changed.") elif not old_lines: log("Configuration file has been created and is not logged.") else: log("New lines set:\n{new}".format(new="".join(new_set))) log("Old lines set:\n{old}".format(old="".join(old_set))) log("Configuration file has been changed.") return bool(new_set or old_set)
def send_fqdn(): spark_master_host = get_fqdn() leadership.leader_set({'master-fqdn': spark_master_host}) hookenv.log("Setting juju leader to {}".format(spark_master_host))
def get_ceph_context(upgrading=False): """Returns the current context dictionary for generating ceph.conf :param upgrading: bool - determines if the context is invoked as part of an upgrade proedure Setting this to true causes settings useful during an upgrade to be defined in the ceph.conf file """ mon_hosts = get_mon_hosts() log('Monitor hosts are ' + repr(mon_hosts)) networks = get_networks('ceph-public-network') public_network = ', '.join(networks) networks = get_networks('ceph-cluster-network') cluster_network = ', '.join(networks) cephcontext = { 'auth_supported': get_auth(), 'mon_hosts': ' '.join(mon_hosts), 'fsid': get_fsid(), 'old_auth': cmp_pkgrevno('ceph', "0.51") < 0, 'crush_initial_weight': config('crush-initial-weight'), 'osd_journal_size': config('osd-journal-size'), 'osd_max_backfills': config('osd-max-backfills'), 'osd_recovery_max_active': config('osd-recovery-max-active'), 'use_syslog': str(config('use-syslog')).lower(), 'ceph_public_network': public_network, 'ceph_cluster_network': cluster_network, 'loglevel': config('loglevel'), 'dio': str(config('use-direct-io')).lower(), 'short_object_len': use_short_objects(), 'upgrade_in_progress': upgrading, 'bluestore': config('bluestore'), 'bluestore_experimental': cmp_pkgrevno('ceph', '12.1.0') < 0, 'bluestore_block_wal_size': config('bluestore-block-wal-size'), 'bluestore_block_db_size': config('bluestore-block-db-size'), } if config('prefer-ipv6'): dynamic_ipv6_address = get_ipv6_addr()[0] if not public_network: cephcontext['public_addr'] = dynamic_ipv6_address if not cluster_network: cephcontext['cluster_addr'] = dynamic_ipv6_address else: cephcontext['public_addr'] = get_public_addr() cephcontext['cluster_addr'] = get_cluster_addr() if config('customize-failure-domain'): az = az_info() if az: cephcontext['crush_location'] = "root=default {} host={}" \ .format(az, socket.gethostname()) else: log("Your Juju environment doesn't" "have support for Availability Zones") # NOTE(dosaboy): these sections must correspond to what is supported in the # config template. sections = ['global', 'osd'] cephcontext.update(CephConfContext(permitted_sections=sections)()) return cephcontext
def decode_cert(cert): try: return base64.b64decode(cert).decode() except Exception as e: log("Couldn't decode certificate: {}".format(e), level=ERROR) return None
if not vaultlocker.vault_relation_complete(): status_set('waiting', 'Incomplete relation: vault') return # Check for OSD device creation parity i.e. at least some devices # must have been presented and used for this charm to be operational (prev_status, prev_message) = status_get() running_osds = ceph.get_running_osds() if not prev_message.startswith('Non-pristine'): if not running_osds: status_set( 'blocked', 'No block devices detected using current configuration') else: status_set('active', 'Unit is ready ({} OSD)'.format(len(running_osds))) @hooks.hook('update-status') @harden() def update_status(): log('Updating status.') if __name__ == '__main__': try: hooks.execute(sys.argv) except UnregisteredHookError as e: log('Unknown hook {} - skipping.'.format(e)) assess_status()
def prepare_disks_and_activate(): # NOTE: vault/vaultlocker preflight check vault_kv = vaultlocker.VaultKVContext(vaultlocker.VAULTLOCKER_BACKEND) context = vault_kv() if use_vaultlocker() and not vault_kv.complete: log('Deferring OSD preparation as vault not ready', level=DEBUG) return elif use_vaultlocker() and vault_kv.complete: log('Vault ready, writing vaultlocker configuration', level=DEBUG) vaultlocker.write_vaultlocker_conf(context) osd_journal = get_journal_devices() if not osd_journal.isdisjoint(set(get_devices())): raise ValueError('`osd-journal` and `osd-devices` options must not' 'overlap.') log("got journal devs: {}".format(osd_journal), level=DEBUG) # pre-flight check of eligible device pristinity devices = get_devices() # if a device has been previously touched we need to consider it as # non-pristine. If it needs to be re-processed it has to be zapped # via the respective action which also clears the unitdata entry. db = kv() touched_devices = db.get('osd-devices', []) devices = [dev for dev in devices if dev not in touched_devices] log('Skipping osd devices previously processed by this unit: {}'.format( touched_devices)) # filter osd-devices that are file system paths devices = [dev for dev in devices if dev.startswith('/dev')] # filter osd-devices that does not exist on this unit devices = [dev for dev in devices if os.path.exists(dev)] # filter osd-devices that are already mounted devices = [dev for dev in devices if not is_device_mounted(dev)] # filter osd-devices that are active bluestore devices devices = [ dev for dev in devices if not ceph.is_active_bluestore_device(dev) ] log('Checking for pristine devices: "{}"'.format(devices), level=DEBUG) if not all(ceph.is_pristine_disk(dev) for dev in devices): status_set( 'blocked', 'Non-pristine devices detected, consult ' '`list-disks`, `zap-disk` and `blacklist-*` actions.') return if ceph.is_bootstrapped(): log('ceph bootstrapped, rescanning disks') emit_cephconf() for dev in get_devices(): ceph.osdize(dev, config('osd-format'), osd_journal, config('ignore-device-errors'), config('osd-encrypt'), config('bluestore'), config('osd-encrypt-keymanager')) # Make it fast! if config('autotune'): ceph.tune_dev(dev) ceph.start_osds(get_devices())
def leader_init_db_if_ready(skip_acl_check=False, skip_cells_restarts=False, db_rid=None, unit=None): """Initialise db if leader and db not yet intialised. NOTE: must be called from database context. """ if not is_elected_leader(CLUSTER_RES): log("Not leader - skipping db init", level=DEBUG) return if is_db_initialised(): log("Database already initialised - skipping db init", level=DEBUG) return # Bugs 1353135 & 1187508. Dbs can appear to be ready before the units # acl entry has been added. So, if the db supports passing a list of # permitted units then check if we're in the list. allowed_units = relation_get('nova_allowed_units', rid=db_rid, unit=unit) if skip_acl_check or (allowed_units and local_unit() in allowed_units.split()): status_set('maintenance', 'Running nova db migration') migrate_nova_database() log('Triggering remote cloud-compute restarts.') [compute_joined(rid=rid, remote_restart=True) for rid in relation_ids('cloud-compute')] log('Triggering remote neutron-network-service restarts.') [quantum_joined(rid=rid, remote_restart=True) for rid in relation_ids('quantum-network-service')] if not skip_cells_restarts: log('Triggering remote cell restarts.') [nova_cell_relation_joined(rid=rid, remote_restart=True) for rid in relation_ids('cell')] else: log('allowed_units either not presented, or local unit ' 'not in acl list: %s' % repr(allowed_units))
def check_for_upgrade(): if not os.path.exists(ceph._upgrade_keyring): log("Ceph upgrade keyring not detected, skipping upgrade checks.") return c = hookenv.config() old_version = ceph.resolve_ceph_version(c.previous('source') or 'distro') log('old_version: {}'.format(old_version)) new_version = ceph.resolve_ceph_version( hookenv.config('source') or 'distro') log('new_version: {}'.format(new_version)) # May be in a previous upgrade that was failed if the directories # still need an ownership update. Check this condition. resuming_upgrade = ceph.dirs_need_ownership_update('osd') if old_version == new_version and not resuming_upgrade: log("No new ceph version detected, skipping upgrade.", DEBUG) return if (ceph.UPGRADE_PATHS.get(old_version) == new_version) or\ resuming_upgrade: if old_version == new_version: log('Attempting to resume possibly failed upgrade.', INFO) else: log("{} to {} is a valid upgrade path. Proceeding.".format( old_version, new_version)) emit_cephconf(upgrading=True) ceph.roll_osd_cluster(new_version=new_version, upgrade_key='osd-upgrade') emit_cephconf(upgrading=False) else: # Log a helpful error message log("Invalid upgrade path from {} to {}. " "Valid paths are: {}".format(old_version, new_version, ceph.pretty_print_upgrade_paths()))
def get_pgs(self, pool_size, percent_data=DEFAULT_POOL_WEIGHT): """Return the number of placement groups to use when creating the pool. Returns the number of placement groups which should be specified when creating the pool. This is based upon the calculation guidelines provided by the Ceph Placement Group Calculator (located online at http://ceph.com/pgcalc/). The number of placement groups are calculated using the following: (Target PGs per OSD) * (OSD #) * (%Data) ---------------------------------------- (Pool size) Per the upstream guidelines, the OSD # should really be considered based on the number of OSDs which are eligible to be selected by the pool. Since the pool creation doesn't specify any of CRUSH set rules, the default rule will be dependent upon the type of pool being created (replicated or erasure). This code makes no attempt to determine the number of OSDs which can be selected for the specific rule, rather it is left to the user to tune in the form of 'expected-osd-count' config option. :param pool_size: int. pool_size is either the number of replicas for replicated pools or the K+M sum for erasure coded pools :param percent_data: float. the percentage of data that is expected to be contained in the pool for the specific OSD set. Default value is to assume 10% of the data is for this pool, which is a relatively low % of the data but allows for the pg_num to be increased. NOTE: the default is primarily to handle the scenario where related charms requiring pools has not been upgraded to include an update to indicate their relative usage of the pools. :return: int. The number of pgs to use. """ # Note: This calculation follows the approach that is provided # by the Ceph PG Calculator located at http://ceph.com/pgcalc/. validator(value=pool_size, valid_type=int) # Ensure that percent data is set to something - even with a default # it can be set to None, which would wreak havoc below. if percent_data is None: percent_data = DEFAULT_POOL_WEIGHT # If the expected-osd-count is specified, then use the max between # the expected-osd-count and the actual osd_count osd_list = get_osds(self.service) expected = config('expected-osd-count') or 0 if osd_list: osd_count = max(expected, len(osd_list)) # Log a message to provide some insight if the calculations claim # to be off because someone is setting the expected count and # there are more OSDs in reality. Try to make a proper guess # based upon the cluster itself. if expected and osd_count != expected: log("Found more OSDs than provided expected count. " "Using the actual count instead", INFO) elif expected: # Use the expected-osd-count in older ceph versions to allow for # a more accurate pg calculations osd_count = expected else: # NOTE(james-page): Default to 200 for older ceph versions # which don't support OSD query from cli return LEGACY_PG_COUNT percent_data /= 100.0 target_pgs_per_osd = config('pgs-per-osd') or DEFAULT_PGS_PER_OSD_TARGET num_pg = (target_pgs_per_osd * osd_count * percent_data) // pool_size # NOTE: ensure a sane minimum number of PGS otherwise we don't get any # reasonable data distribution in minimal OSD configurations if num_pg < DEFAULT_MINIMUM_PGS: num_pg = DEFAULT_MINIMUM_PGS # The CRUSH algorithm has a slight optimization for placement groups # with powers of 2 so find the nearest power of 2. If the nearest # power of 2 is more than 25% below the original value, the next # highest value is used. To do this, find the nearest power of 2 such # that 2^n <= num_pg, check to see if its within the 25% tolerance. exponent = math.floor(math.log(num_pg, 2)) nearest = 2 ** exponent if (num_pg - nearest) > (num_pg * 0.25): # Choose the next highest power of 2 since the nearest is more # than 25% below the original value. return int(nearest * 2) else: return int(nearest)
def compute_changed(rid=None, unit=None): for r_id in relation_ids('nova-api'): nova_api_relation_joined(rid=r_id) rel_settings = relation_get(rid=rid, unit=unit) if not rel_settings.get('region', None) == config('region'): relation_set(relation_id=rid, region=config('region')) if 'migration_auth_type' not in rel_settings: return if rel_settings['migration_auth_type'] == 'ssh': status_set('maintenance', 'configuring live migration') key = rel_settings.get('ssh_public_key') if not key: log('SSH migration set but peer did not publish key.') return ssh_compute_add(key, rid=rid, unit=unit) index = 0 for line in ssh_known_hosts_lines(unit=unit): relation_set( relation_id=rid, relation_settings={ 'known_hosts_{}'.format(index): line}) index += 1 relation_set(relation_id=rid, known_hosts_max_index=index) index = 0 for line in ssh_authorized_keys_lines(unit=unit): relation_set( relation_id=rid, relation_settings={ 'authorized_keys_{}'.format(index): line}) index += 1 relation_set(relation_id=rid, authorized_keys_max_index=index) if 'nova_ssh_public_key' not in rel_settings: return if rel_settings['nova_ssh_public_key']: ssh_compute_add(rel_settings['nova_ssh_public_key'], rid=rid, unit=unit, user='******') index = 0 for line in ssh_known_hosts_lines(unit=unit, user='******'): relation_set( relation_id=rid, relation_settings={ '{}_known_hosts_{}'.format( 'nova', index): line}) index += 1 relation_set( relation_id=rid, relation_settings={ '{}_known_hosts_max_index'.format('nova'): index}) index = 0 for line in ssh_authorized_keys_lines(unit=unit, user='******'): relation_set( relation_id=rid, relation_settings={ '{}_authorized_keys_{}'.format( 'nova', index): line}) index += 1 relation_set( relation_id=rid, relation_settings={ '{}_authorized_keys_max_index'.format('nova'): index})