def monitor(self, w, argv, cpids, agents, slave_vol, slave_host, master, suuid, slavenodes): """the monitor loop Basic logic is a blantantly simple blunt heuristics: if spawned client survives 60 secs, it's considered OK. This servers us pretty well as it's not vulneralbe to any kind of irregular behavior of the child... ... well, except for one: if children is hung up on waiting for some event, it can survive aeons, still will be defunct. So we tweak the above logic to expect the worker to send us a signal within 60 secs (in the form of closing its end of a pipe). The worker does this when it's done with the setup stage ready to enter the service loop (note it's the setup stage which is vulnerable to hangs -- the full blown worker blows up on EPIPE if the net goes down, due to the keep-alive thread) """ if not self.status.get(w[0]['dir'], None): self.status[w[0]['dir']] = GeorepStatus( gconf.get("state-file"), w[0]['host'], w[0]['dir'], w[0]['uuid'], master, "%s::%s" % (slave_host, slave_vol)) set_monitor_status(gconf.get("state-file"), self.ST_STARTED) self.status[w[0]['dir']].set_worker_status(self.ST_INIT) ret = 0 def nwait(p, o=0): try: p2, r = waitpid(p, o) if not p2: return return r except OSError as e: # no child process, this happens if the child process # already died and has been cleaned up if e.errno == ECHILD: return -1 else: raise def exit_signalled(s): """ child teminated due to receipt of SIGUSR1 """ return (os.WIFSIGNALED(s) and (os.WTERMSIG(s) == signal.SIGUSR1)) def exit_status(s): if os.WIFEXITED(s): return os.WEXITSTATUS(s) return 1 conn_timeout = gconf.get("connection-timeout") while ret in (0, 1): remote_user, remote_host = w[1][0].split("@") remote_id = w[1][1] # Check the status of the connected slave node # If the connected slave node is down then try to connect to # different up node. current_slave_host = remote_host slave_up_hosts = get_up_nodes(slavenodes, gconf.get("ssh-port")) if (current_slave_host, remote_id) not in slave_up_hosts: if len(slave_up_hosts) > 0: remote_new = random.choice(slave_up_hosts) remote_host = "%s@%s" % (remote_user, remote_new[0]) remote_id = remote_new[1] # Spawn the worker and agent in lock to avoid fd leak self.lock.acquire() logging.info( lf('starting gsyncd worker', brick=w[0]['dir'], slave_node=remote_host)) # Couple of pipe pairs for RPC communication b/w # worker and changelog agent. # read/write end for agent (ra, ww) = os.pipe() # read/write end for worker (rw, wa) = os.pipe() # spawn the agent process apid = os.fork() if apid == 0: os.close(rw) os.close(ww) args_to_agent = argv + [ 'agent', rconf.args.master, rconf.args.slave, '--local-path', w[0]['dir'], '--local-node', w[0]['host'], '--local-node-id', w[0]['uuid'], '--slave-id', suuid, '--rpc-fd', ','.join( [str(ra), str(wa), str(rw), str(ww)]) ] if rconf.args.config_file is not None: args_to_agent += ['-c', rconf.args.config_file] if rconf.args.debug: args_to_agent.append("--debug") os.execv(sys.executable, args_to_agent) pr, pw = os.pipe() cpid = os.fork() if cpid == 0: os.close(pr) os.close(ra) os.close(wa) args_to_worker = argv + [ 'worker', rconf.args.master, rconf.args.slave, '--feedback-fd', str(pw), '--local-path', w[0]['dir'], '--local-node', w[0]['host'], '--local-node-id', w[0]['uuid'], '--slave-id', suuid, '--rpc-fd', ','.join( [str(rw), str(ww), str(ra), str(wa)]), '--subvol-num', str(w[2]), '--resource-remote', remote_host, '--resource-remote-id', remote_id ] if rconf.args.config_file is not None: args_to_worker += ['-c', rconf.args.config_file] if w[3]: args_to_worker.append("--is-hottier") if rconf.args.debug: args_to_worker.append("--debug") os.execv(sys.executable, args_to_worker) cpids.add(cpid) agents.add(apid) os.close(pw) # close all RPC pipes in monitor os.close(ra) os.close(wa) os.close(rw) os.close(ww) self.lock.release() t0 = time.time() so = select((pr, ), (), (), conn_timeout)[0] os.close(pr) if so: ret = nwait(cpid, os.WNOHANG) ret_agent = nwait(apid, os.WNOHANG) if ret_agent is not None: # Agent is died Kill Worker logging.info( lf("Changelog Agent died, Aborting Worker", brick=w[0]['dir'])) errno_wrap(os.kill, [cpid, signal.SIGKILL], [ESRCH]) nwait(cpid) nwait(apid) if ret is not None: logging.info( lf("worker died before establishing " "connection", brick=w[0]['dir'])) nwait(apid) # wait for agent else: logging.debug("worker(%s) connected" % w[0]['dir']) while time.time() < t0 + conn_timeout: ret = nwait(cpid, os.WNOHANG) ret_agent = nwait(apid, os.WNOHANG) if ret is not None: logging.info( lf("worker died in startup phase", brick=w[0]['dir'])) nwait(apid) # wait for agent break if ret_agent is not None: # Agent is died Kill Worker logging.info( lf("Changelog Agent died, Aborting " "Worker", brick=w[0]['dir'])) errno_wrap(os.kill, [cpid, signal.SIGKILL], [ESRCH]) nwait(cpid) nwait(apid) break time.sleep(1) else: logging.info( lf( "Worker not confirmed after wait, aborting it. " "Gsyncd invocation on remote slave via SSH or " "gluster master mount might have hung. Please " "check the above logs for exact issue and check " "master or slave volume for errors. Restarting " "master/slave volume accordingly might help.", brick=w[0]['dir'], timeout=conn_timeout)) errno_wrap(os.kill, [cpid, signal.SIGKILL], [ESRCH]) nwait(apid) # wait for agent ret = nwait(cpid) if ret is None: # If worker dies, agent terminates on EOF. # So lets wait for agent first. nwait(apid) ret = nwait(cpid) if exit_signalled(ret): ret = 0 else: ret = exit_status(ret) if ret in (0, 1): self.status[w[0]['dir']].set_worker_status(self.ST_FAULTY) gf_event(EVENT_GEOREP_FAULTY, master_volume=master.volume, master_node=w[0]['host'], master_node_id=w[0]['uuid'], slave_host=slave_host, slave_volume=slave_vol, current_slave_host=current_slave_host, brick_path=w[0]['dir']) time.sleep(10) self.status[w[0]['dir']].set_worker_status(self.ST_INCON) return ret
def monitor(self, w, argv, cpids, agents, slave_vol, slave_host, master, suuid, slavenodes): """the monitor loop Basic logic is a blantantly simple blunt heuristics: if spawned client survives 60 secs, it's considered OK. This servers us pretty well as it's not vulneralbe to any kind of irregular behavior of the child... ... well, except for one: if children is hung up on waiting for some event, it can survive aeons, still will be defunct. So we tweak the above logic to expect the worker to send us a signal within 60 secs (in the form of closing its end of a pipe). The worker does this when it's done with the setup stage ready to enter the service loop (note it's the setup stage which is vulnerable to hangs -- the full blown worker blows up on EPIPE if the net goes down, due to the keep-alive thread) """ if not self.status.get(w[0]['dir'], None): self.status[w[0]['dir']] = GeorepStatus(gconf.get("state-file"), w[0]['host'], w[0]['dir'], w[0]['uuid'], master, "%s::%s" % (slave_host, slave_vol)) ret = 0 def nwait(p, o=0): try: p2, r = waitpid(p, o) if not p2: return return r except OSError as e: # no child process, this happens if the child process # already died and has been cleaned up if e.errno == ECHILD: return -1 else: raise def exit_signalled(s): """ child terminated due to receipt of SIGUSR1 """ return (os.WIFSIGNALED(s) and (os.WTERMSIG(s) == signal.SIGUSR1)) def exit_status(s): if os.WIFEXITED(s): return os.WEXITSTATUS(s) return 1 conn_timeout = gconf.get("connection-timeout") while ret in (0, 1): remote_user, remote_host = w[1][0].split("@") remote_id = w[1][1] # Check the status of the connected slave node # If the connected slave node is down then try to connect to # different up node. current_slave_host = remote_host slave_up_hosts = get_up_nodes(slavenodes, gconf.get("ssh-port")) if (current_slave_host, remote_id) not in slave_up_hosts: if len(slave_up_hosts) > 0: remote_new = random.choice(slave_up_hosts) remote_host = "%s@%s" % (remote_user, remote_new[0]) remote_id = remote_new[1] # Spawn the worker and agent in lock to avoid fd leak self.lock.acquire() self.status[w[0]['dir']].set_worker_status(self.ST_INIT) logging.info(lf('starting gsyncd worker', brick=w[0]['dir'], slave_node=remote_host)) # Couple of pipe pairs for RPC communication b/w # worker and changelog agent. # read/write end for agent (ra, ww) = pipe() # read/write end for worker (rw, wa) = pipe() # spawn the agent process apid = os.fork() if apid == 0: os.close(rw) os.close(ww) args_to_agent = argv + [ 'agent', rconf.args.master, rconf.args.slave, '--local-path', w[0]['dir'], '--local-node', w[0]['host'], '--local-node-id', w[0]['uuid'], '--slave-id', suuid, '--rpc-fd', ','.join([str(ra), str(wa), str(rw), str(ww)]) ] if rconf.args.config_file is not None: args_to_agent += ['-c', rconf.args.config_file] if rconf.args.debug: args_to_agent.append("--debug") os.execv(sys.executable, args_to_agent) pr, pw = pipe() cpid = os.fork() if cpid == 0: os.close(pr) os.close(ra) os.close(wa) args_to_worker = argv + [ 'worker', rconf.args.master, rconf.args.slave, '--feedback-fd', str(pw), '--local-path', w[0]['dir'], '--local-node', w[0]['host'], '--local-node-id', w[0]['uuid'], '--slave-id', suuid, '--rpc-fd', ','.join([str(rw), str(ww), str(ra), str(wa)]), '--subvol-num', str(w[2]), '--resource-remote', remote_host, '--resource-remote-id', remote_id ] if rconf.args.config_file is not None: args_to_worker += ['-c', rconf.args.config_file] if w[3]: args_to_worker.append("--is-hottier") if rconf.args.debug: args_to_worker.append("--debug") access_mount = gconf.get("access-mount") if access_mount: os.execv(sys.executable, args_to_worker) else: if unshare_propagation_supported(): logging.debug("Worker would mount volume privately") unshare_cmd = ['unshare', '-m', '--propagation', 'private'] cmd = unshare_cmd + args_to_worker os.execvp("unshare", cmd) else: logging.debug("Mount is not private. It would be lazy" " umounted") os.execv(sys.executable, args_to_worker) cpids.add(cpid) agents.add(apid) os.close(pw) # close all RPC pipes in monitor os.close(ra) os.close(wa) os.close(rw) os.close(ww) self.lock.release() t0 = time.time() so = select((pr,), (), (), conn_timeout)[0] os.close(pr) if so: ret = nwait(cpid, os.WNOHANG) ret_agent = nwait(apid, os.WNOHANG) if ret_agent is not None: # Agent is died Kill Worker logging.info(lf("Changelog Agent died, Aborting Worker", brick=w[0]['dir'])) errno_wrap(os.kill, [cpid, signal.SIGKILL], [ESRCH]) nwait(cpid) nwait(apid) if ret is not None: logging.info(lf("worker died before establishing " "connection", brick=w[0]['dir'])) nwait(apid) # wait for agent else: logging.debug("worker(%s) connected" % w[0]['dir']) while time.time() < t0 + conn_timeout: ret = nwait(cpid, os.WNOHANG) ret_agent = nwait(apid, os.WNOHANG) if ret is not None: logging.info(lf("worker died in startup phase", brick=w[0]['dir'])) nwait(apid) # wait for agent break if ret_agent is not None: # Agent is died Kill Worker logging.info(lf("Changelog Agent died, Aborting " "Worker", brick=w[0]['dir'])) errno_wrap(os.kill, [cpid, signal.SIGKILL], [ESRCH]) nwait(cpid) nwait(apid) break time.sleep(1) else: logging.info( lf("Worker not confirmed after wait, aborting it. " "Gsyncd invocation on remote slave via SSH or " "gluster master mount might have hung. Please " "check the above logs for exact issue and check " "master or slave volume for errors. Restarting " "master/slave volume accordingly might help.", brick=w[0]['dir'], timeout=conn_timeout)) errno_wrap(os.kill, [cpid, signal.SIGKILL], [ESRCH]) nwait(apid) # wait for agent ret = nwait(cpid) if ret is None: # If worker dies, agent terminates on EOF. # So lets wait for agent first. nwait(apid) ret = nwait(cpid) if exit_signalled(ret): ret = 0 else: ret = exit_status(ret) if ret in (0, 1): self.status[w[0]['dir']].set_worker_status(self.ST_FAULTY) gf_event(EVENT_GEOREP_FAULTY, master_volume=master.volume, master_node=w[0]['host'], master_node_id=w[0]['uuid'], slave_host=slave_host, slave_volume=slave_vol, current_slave_host=current_slave_host, brick_path=w[0]['dir']) time.sleep(10) self.status[w[0]['dir']].set_worker_status(self.ST_INCON) return ret