def hb_fo(self): try: fd = os.open(self.dev, self.flags) fo = os.fdopen(fd, 'rb+') except OSError as exc: if exc.errno == errno.EINVAL: raise ex.excAbortAction("%s directio is not supported" % self.dev) else: raise ex.excAbortAction("error opening %s: %s" % (self.dev, str(exc))) except Exception as exc: raise ex.excAbortAction("error opening %s: %s" % (self.dev, str(exc))) try: yield fo except Exception as exc: self.log.error("%s: %s", self.dev, exc) finally: # closing fo also closes fd try: os.fsync(fd) except OSError as exc: self.duplog("error", "%(exc)s", exc=str(exc), nodename="") fo.close()
def get_cmd(self, action, script_arg=None, validate=True): key = action + "_seq" val = getattr(self, key) if val in (None, False): raise ex.excAbortAction() try: int(val) cmd = [self.script, script_arg if script_arg else action] except (TypeError, ValueError): try: val = convert_boolean(val) if val is False: raise ex.excAbortAction() cmd = [self.script, script_arg if script_arg else action] except ex.excAbortAction: raise except: if six.PY2: cmd = map(lambda s: s.decode('utf8'), shlex.split(val.encode('utf8'))) else: cmd = shlex.split(val) if "|" in cmd or "||" in cmd or "&&" in cmd or any( [True for w in cmd if w.endswith(";")]): return val if validate: cmd = self.validate_on_action(cmd) return cmd
def validate_cluster_global_expect(self, global_expect): if global_expect is None: return if global_expect == "thawed" and shared.DAEMON_STATUS.get( "monitor", {}).get("frozen") == "thawed": raise ex.excAbortAction("cluster is already thawed") if global_expect == "frozen" and shared.DAEMON_STATUS.get( "monitor", {}).get("frozen") == "frozen": raise ex.excAbortAction("cluster is already frozen")
def _configure(self): if self.name not in shared.NODE.cd: # this thread will be stopped. don't reconfigure to avoid logging errors return self.get_hb_nodes() self.peer_config = {} self.timeout = shared.NODE.oget(self.name, "timeout") try: self.relay = shared.NODE.oget(self.name, "relay") except Exception: raise ex.excAbortAction("no %s.relay is not set in node.conf" % self.name) try: self.secret = shared.NODE.oget(self.name, "secret") except Exception: raise ex.excAbortAction("no %s.secret is not set in node.conf" % self.name)
def validate_global_expect(self, path, global_expect, thr=None): if global_expect is None: return if global_expect in ("frozen", "aborted", "provisioned"): # allow provision target state on just-created service return # wait for object to appear for i in range(5): instances = thr.get_service_instances(path) if instances: break if not is_service(path): break time.sleep(1) if not instances: raise ex.excError("object does not exist") ges = set() for nodename, _data in instances.items(): smon = _data.get("monitor", {}) ge = smon.get("global_expect") ges.add(ge) if global_expect == ge: continue status = smon.get("status", "unknown") if status == "tocing" and global_expect == "placed": # Allow the "toc" action with the "switch" monitor_action # to change status from "tocing" to "start failed". pass elif status != "idle" and "failed" not in status and "wait" not in status: raise ex.excError("%s instance on node %s in %s state" "" % (path, nodename, status)) if ges == set([global_expect]): raise ex.excAbortAction("%s is already targeting %s" % (path, global_expect)) if global_expect not in ("started", "stopped"): return agg = Storage(shared.AGG.get(path, {})) if global_expect == "started" and agg.avail == "up": raise ex.excAbortAction("%s is already started" % path) elif global_expect == "stopped" and agg.avail in ("down", "stdby down", "stdby up"): raise ex.excAbortAction("%s is already stopped" % path) if agg.avail in ("n/a", "undef"): raise ex.excAbortAction()
def _configure(self): if self.name not in shared.NODE.cd: # this thread will be stopped. don't reconfigure to avoid logging errors return self.get_hb_nodes() self.peer_config = {} if not hasattr(self, "meta_slot_buff"): self.meta_slot_buff = mmap.mmap(-1, 2 * mmap.PAGESIZE) if not hasattr(self, "slot_buff"): self.slot_buff = mmap.mmap(-1, self.SLOTSIZE) self.timeout = shared.NODE.oget(self.name, "timeout") try: new_dev = shared.NODE.oget(self.name, "dev") except ex.RequiredOptNotFound: raise ex.excAbortAction("no %s.dev is not set in node.conf" % self.name) if not os.path.exists(new_dev): raise ex.excAbortAction("%s does not exist" % new_dev) new_dev = os.path.realpath(new_dev) new_flags = os.O_RDWR statinfo = os.stat(new_dev) if rcEnv.sysname == "Linux": if stat.S_ISBLK(statinfo.st_mode): self.log.info("using directio") new_flags |= os.O_DIRECT | os.O_SYNC | os.O_DSYNC # (Darwin, SunOS) pylint: disable=no-member else: raise ex.excAbortAction("%s must be a block device" % new_dev) else: if not stat.S_ISCHR(statinfo.st_mode): raise ex.excAbortAction("%s must be a char device" % new_dev) if new_dev != self.dev: self.dev = new_dev self.flags = new_flags self.peer_config = {} self.log.info("set dev=%s", self.dev) with self.hb_fo() as fo: self.load_peer_config(fo=fo)
def write_slot(self, slot, data, fo=None): if len(data) > self.SLOTSIZE: self.log.error("attempt to write too long data in slot %d", slot) raise ex.excAbortAction() self.slot_buff.seek(0) self.slot_buff.write(data) offset = self.slot_offset(slot) fo.seek(offset, os.SEEK_SET) fo.write(self.slot_buff) fo.flush()
def validate_destination_node(self, path, global_expect, thr=None): """ For a placed@<dst> <global_expect> (move action) on <path>, Raise an excError if * the object <path> does not exist * the object <path> topology is failover and more than 1 destination node was specified * the specified destination is not a object candidate node * no destination node specified * an empty destination node is specified in a list of destination nodes Raise an excAbortAction if * the avail status of the instance on the destination node is up """ if global_expect is None: return try: global_expect, destination_nodes = global_expect.split("@", 1) except ValueError: return if global_expect != "placed": return instances = thr.get_service_instances(path) if not instances: raise ex.excError("object does not exist") if destination_nodes == "<peer>": instance = list(instances.values())[0] if instance.get("topology") == "flex": raise ex.excError("no destination node specified") else: nodes = [node for node, inst in instances.items() \ if inst.get("avail") not in ("up", "warn", "n/a") and \ inst.get("monitor", {}).get("status") != "started"] count = len(nodes) if count == 0: raise ex.excError("no candidate destination node") svc = thr.get_service(path) return "placed@%s" % thr.placement_ranks(svc, nodes)[0] else: destination_nodes = destination_nodes.split(",") count = len(destination_nodes) if count == 0: raise ex.excError("no destination node specified") instance = list(instances.values())[0] if count > 1 and instance.get("topology") == "failover": raise ex.excError( "only one destination node can be specified for " "a failover service") for destination_node in destination_nodes: if not destination_node: raise ex.excError("empty destination node") if destination_node not in instances: raise ex.excError("destination node %s has no %s instance" % \ (destination_node, path)) instance = instances[destination_node] if instance["avail"] == "up": raise ex.excAbortAction( "instance on destination node %s is " "already up" % destination_node)