def register_server(url, ca, secret, address=None): if _service_is_running() is True: console_log.warning( "chroma-agent service was running before registration, stopping.") agent_service.stop() crypto = Crypto(config.path) # Call delete in case we are over-writing a previous configuration that wasn't removed properly crypto.delete() crypto.install_authority(ca) agent_client = AgentClient(url + "register/%s/" % secret, ActionPluginManager(), DevicePluginManager(), ServerProperties(), crypto) registration_result = agent_client.register(address) crypto.install_certificate(registration_result['certificate']) config.set('settings', 'server', {'url': url}) console_log.info("Enabling chroma-agent service") agent_service.enable() console_log.info("Starting chroma-agent service") agent_service.start() return registration_result
def unconfigure_corosync2(host_fqdn, mcast_port): """ Unconfigure the corosync application. For corosync2 don't disable pcsd, just remove host node from cluster and disable corosync from auto starting (service should already be stopped in state transition) Note that pcs cluster commands handle editing and removal of the corosync.conf file Return: Value using simple return protocol """ error = corosync_service.disable() if error: return agent_error(error) # Detect if we are the only node in the cluster, we want to do this before next command removes conf file cluster_nodes = _nodes_in_cluster() result = AgentShell.run(["pcs", "--force", "cluster", "node", "remove", host_fqdn]) if result.rc != 0: if "No such file or directory" in result.stderr: # we want to return successful if the configuration file does not exist console_log.warning(result.stderr) elif "Error: Unable to update any nodes" in result.stderr: # this error is expected when this is the last node in the cluster if len(cluster_nodes) != 1: return agent_error(result.stderr) else: return agent_error(result.stderr) return agent_ok_or_error( firewall_control.remove_rule(PCS_TCP_PORT, "tcp", "pcs", persist=True) or firewall_control.remove_rule(mcast_port, "udp", "corosync", persist=True) )
def reregister_server(url, address): """ Update manager url and register agent address with manager """ if _service_is_running() is True: console_log.warning( "chroma-agent service was running before registration, stopping.") agent_service.stop() config.set('settings', 'server', {'url': url}) crypto = Crypto(config.path) agent_client = AgentClient(url + 'reregister/', ActionPluginManager(), DevicePluginManager(), ServerProperties(), crypto) data = {'address': address, 'fqdn': agent_client._fqdn} try: result = agent_client.post(data) except HttpError: console_log.error("Reregistration failed to %s with request %s" % (agent_client.url, data)) raise console_log.info("Starting chroma-agent service") agent_service.start() return result
def reregister_server(url, address): """ Update manager url and register agent address with manager """ if _service_is_running() is True: console_log.warning( "chroma-agent service was running before registration, stopping.") agent_service.stop() conf.set_server_url(url) crypto = Crypto(conf.ENV_PATH) agent_client = AgentClient( url + "reregister/", ActionPluginManager(), DevicePluginManager(), ServerProperties(), crypto, ) data = {"address": address, "fqdn": agent_client._fqdn} try: result = agent_client.post(data) except HttpError: console_log.error("Reregistration failed to %s with request %s" % (agent_client.url, data)) raise console_log.info("Starting chroma-agent service") agent_service.start() return result
def target_running(uuid): # This is called by the Target RA from corosync from os import _exit try: info = _get_target_config(uuid) except (KeyError, TypeError) as err: # it can't possibly be running here if the config entry for # it doesn't even exist, or if the store doesn't even exist! console_log.warning("Exception getting target config: %s", err) _exit(1) filesystem = FileSystem(info["backfstype"], info["bdev"]) for devices, mntpnt, _ in get_local_mounts(): if (mntpnt == info["mntpt"]) and next( (True for device in devices if filesystem.devices_match(device, info["bdev"], uuid)), False, ): _exit(0) console_log.warning( "Did not find mount with matching mntpt and device for %s", uuid) _exit(1)
def get_path(major_minor, device_name): # Try to find device nodes for these: fallback_dev_path = os.path.join("/dev/", device_name) # * First look in /dev/mapper if major_minor in mapper_devs: return mapper_devs[major_minor] # * Then try /dev/disk/by-id elif major_minor in by_id_nodes: return by_id_nodes[major_minor] # * Then try /dev/disk/by-path elif major_minor in by_path_nodes: return by_path_nodes[major_minor] # * Then fall back to just /dev elif os.path.exists(fallback_dev_path): return fallback_dev_path else: console_log.warning("Could not find device node for %s (%s)" % (major_minor, fallback_dev_path)) return None
def target_running(uuid): from os import _exit try: info = _get_target_config(uuid) except (KeyError, TypeError) as e: # it can't possibly be running here if the config entry for # it doesn't even exist, or if the store doesn't even exist! console_log.warning("Exception getting target config: '%s'" % e) _exit(1) filesystem = FileSystem(info['backfstype'], info['bdev']) for device, mntpnt, fstype in get_local_mounts(): if (mntpnt == info['mntpt']) and filesystem.devices_match( device, info['bdev'], uuid): _exit(0) console_log.warning( "Did not find mount with matching mntpt and device for %s" % uuid) _exit(1)
def _scan_mounts(self): try: mounts = {} (kind, dev_tree) = scanner_cmd("Stream").items().pop() lustre_info = [] get_lustre_mount_info(kind, dev_tree, lustre_info) for mount, fs_uuid, fs_label in lustre_info: device = mount.get("source") mntpnt = mount.get("target") recovery_status = {} try: lines = AgentShell.try_run( ["lctl", "get_param", "-n", "*.%s.recovery_status" % fs_label] ) for line in lines.split("\n"): tokens = line.split(":") if len(tokens) != 2: continue k = tokens[0].strip() v = tokens[1].strip() recovery_status[k] = v except Exception: # If the recovery_status file doesn't exist, # we will return an empty dict for recovery info pass mounts[device] = { "fs_uuid": fs_uuid, "mount_point": mntpnt, "recovery_status": recovery_status, } return mounts.values() except Exception as e: console_log.warning("Error scanning mounts: {}".format(e)) return {}
def _nodes_in_cluster(): """ Returns the nodes in the corosync cluster example output from command 'pcs status corosync': > Corosync Nodes: > Online: > Offline: bill.bailey.com bob.marley.com :return: a list of all nodes in cluster """ nodes = [] result = AgentShell.run(["pcs", "status", "nodes", "corosync"]) if result.rc != 0: # log all command errors but always continue to remove node from cluster console_log.warning(result.stderr) else: # nodes are on the right side of lines separated with ':' for line in result.stdout.split("\n"): if line.find(":") > 0: nodes.extend(line.split(":")[1].strip().split()) return nodes