def spin(self, data): if self.terminate: raise Aborted('terminating') while len(self.pending) > 0: out = None msg = self.pending.popleft() try: # # - run the specified closure # - assign the latch to whatever is returned # out = msg['function'](data.zk) except Exception as failure: # # - in case of exception simply pass it upwards via the latch # - this will allow for finer-grained error handling # out = failure msg['latch'].set(out) return 'spin', data, 0.25
def spin(self, data): # # - if the termination trigger is set, abort immediately # if self.force_reset or self.terminate: raise Aborted('resetting') # # - attempt to fetch the lock # - allocate it if not already done # - it is *important* to just allocate one lock as there is a leak in kazoo # if not hasattr(data, 'lock'): data.lock = data.zk.Lock('%s/coordinator' % self.prefix) try: # # - attempt to lock within a 5 seconds timeout to avoid stalling in some cases # if data.lock.acquire(timeout=5.0 * SAMPLING): return 'start_controller', data, 0 except LockTimeout: pass return 'spin', data, 0
def kill(self, data): # # - the /kill request will first guarantee we terminate the process # if data.sub: raise Aborted('resetting to terminate pid %s' % data.sub.pid) try: # # - invoke the optional finalize() callback # logger.info('%s : finalizing pod' % self.path) self.finalize() except Exception as failure: # # - log something if for some reason finalize() failed as we can't really recover # - don't bother responding with a 406 # logger.warning('%s : failed to finalize -> %s' % (self.path, diagnostic(failure))) # # - in any case request a termination and tag the pod as 'dead' # reply = {}, 200 self.terminate = 1 self.hints['process'] = 'dead' data.latch.set(reply) self.commands.popleft() return 'spin', data, 0
def wait_for_cnx(self, data): if self.terminate: raise Aborted('terminating') if not self.connected: return 'wait_for_cnx', data, 1.0 return 'spin', data, 0
def wait_for_cnx(self, data): if self.force_reset or self.terminate: raise Aborted('resetting') # # - loop back if we haven't received a CONNECTED event from the driver # if not self.connected: return 'wait_for_cnx', data, SAMPLING # # - the /pods node holds all our ephemeral per-container data (one container == one child node) # - the /hash node stores the last recorded md5 hash (local pods + dependencies), which we use to # flag any change amongst the pods or their dependencies # data.zk.ensure_path('%s/pods' % self.prefix) data.zk.ensure_path('%s/hash' % self.prefix) try: # # - register ourselves by creating an ephemeral # - this is where we can store arbitrary information (e.g our breadcrumbs) # - we ask for a sequence counter as well which we then keep (e.g in case of connection loss or reset # we guarantee the pod won't get assigned a new index) # - this is *critical* for some use-cases (e.g Kafka where the broker index must remain the same) # path = data.zk.create('%s/pods/%s.' % (self.prefix, self.id), ephemeral=True, sequence=True) tokens = path.split('.') if self.seq is None: self.seq = int(tokens[-1]) self.breadcrumbs['seq'] = self.seq js = json.dumps(self.breadcrumbs) data.zk.set(path, js) except NodeExistsError: # # - if the node is already there we just recovered from a zookeeper connection loss # and /snapshot has not been phased out yet .. this is not an issue, simply pause a bit # to re-attempt later # logger.debug( '%s : pod %s is already there (probably a zk reconnect)' % (self.path, self.id)) return 'wait_for_cnx', data, 5.0 * SAMPLING logger.debug('%s : registered as %s (#%d)' % (self.path, self.id, self.seq)) data.connected_at = time.time() return 'spin', data, 0
def off(self, data): # # - the /stop request does basically nothing # - it only guarantees we terminate the process # if data.forked: raise Aborted('resetting to terminate pid %s' % data.forked.pid) data.latch.set(200) self.commands.popleft() return 'spin', data, 0
def spin(self, data): # # - if the termination trigger is set, abort immediately # if self.force_reset or self.terminate: raise Aborted('resetting') # # - attempt to fetch the lock # lock = data.zk.Lock('%s/coordinator' % self.prefix) try: # # - the kazoo lock recipe seems to be sensitive if being switched to SUSPENDED .. in order to # avoid stalling on the lock (which is the default behavior), attempt to lock multiple time # with a short timeout (e.g spin-lock) # if hasattr(data, 'lock') and data.lock: try: data.lock.release() except ConnectionClosedError: pass data.lock = None lock.acquire(timeout=SAMPLING) logger.debug('%s : lock acquired @ %s, now leading' % (self.path, self.prefix)) data.lock = lock # # - we have the lock (e.g we are the leader) # - start the controller actor # data.latch = ThreadingFuture() data.controller = self.model.start(data.zk, self.hints, self.scope, self.tag, self.port, data.latch) return 'lock', data, 0 except LockTimeout: pass # # - we could not obtain the lock # - blindly loop back and attempt to get it again # return 'spin', data, 0
def lock(self, data): # # - if the termination trigger is set, abort immediately # if self.force_reset or self.terminate: raise Aborted('resetting') # # - spin-lock on the controller latch # - any catastrophic plug failure will be trapped that way # try: Event() out = data.latch.get(SAMPLING) if isinstance(out, Exception): raise out except Timeout: pass return 'lock', data, 0
def start_controller(self, data): # # - if the termination trigger is set, abort immediately # - this is important as it is possible to somehow get the lock after a suspend (acquire() returns # true in that case which is misleading) # if self.force_reset or self.terminate: raise Aborted('resetting') # # - we have the lock (e.g we are the leader) # - start the controller actor # data.latch = ThreadingFuture() logger.debug('%s : lock acquired @ %s, now leading' % (self.path, self.prefix)) data.controller = self.model.start(data.zk, self.id, self.hints, self.scope, self.tag, self.port, data.latch) return 'lock', data, 0
def on(self, data): if data.sub and data.js and (self.strict or data.js['dependencies'] != self.last['dependencies']): # # - if we already have a process, we want to re-configure -> force a reset first # - this will go through a graceful termination process # - we'll come back here afterwards (with data.sub set to None) # raise Aborted('resetting to terminate pid %s first' % data.sub.pid) elif data.sub: # # - the process is already running, fail gracefully on a 200 # - this is the code-path used for instance up a leader request when strict is false # reply = {}, 200 logger.debug('%s : skipping /control/on request' % self.path) data.latch.set(reply) else: # # - no more process running, go on with the configuration # try: if not self.initialized: # # - if this is the 1st time the pod is running invoke the initialize() callback # - this is typically used to run once-only stuff such as attaching storage volumes, etc. # logger.info('%s : initializing pod' % self.path) self.initialize() self.initialized = 1 if data.js: # # - run the configuration procedure if we have some json # - we'll use whatever it returns to popen() a new process # - keep track of the shell command line returned by configure() for later # - make sure the optional overrides set by configure() are strings # cluster = _Cluster(data.js) logger.info('%s : configuring pod %d/%d' % (self.path, 1 + cluster.index, cluster.size)) data.command, overrides = self.configure(cluster) data.env = { key: str(value) for key, value in overrides.items() } self.last = data.js assert data.command, 'request to start process while not yet configured (user error ?)' # # - spawn a new sub-process if the auto-start flag is on OR if we already ran at least once # - the start flag comes from the $ochopod_start environment variable # if not data.js or self.start or data.pids > 0: # # - combine our environment variables with the overrides from configure() # - popen() the new process and log stdout/stderr in a separate thread if required # - make sure to set close_fds in order to avoid sharing the flask socket with the subprocess # - reset the sanity check counter # - keep track of its pid to kill it later on # env = deepcopy(self.env) env.update(data.env) tokens = data.command if self.shell else data.command.split( ' ') if self.pipe_subprocess: # # - set the popen call to use piping if required # - spawn an ancillary thread to forward the lines to our logger # - this thread will go down automatically when the sub-process does # data.sub = Popen(tokens, close_fds=True, cwd=self.cwd, env=env, shell=self.shell, stderr=STDOUT, stdout=PIPE) def _pipe(process): while True: line = process.stdout.readline().rstrip('\n') code = process.poll() if line == '' and code is not None: break logger.info('pid %s : %s' % (process.pid, line)) out = Thread(target=_pipe, args=(data.sub, )) out.daemon = True out.start() else: # # - default popen call without piping # data.sub = Popen(tokens, close_fds=True, cwd=self.cwd, env=env, shell=self.shell) data.pids += 1 self.hints['process'] = 'running' logger.info( '%s : popen() #%d -> started <%s> as pid %s' % (self.path, data.pids, data.command, data.sub.pid)) if data.env: unrolled = '\n'.join([ '\t%s -> %s' % (k, v) for k, v in data.env.items() ]) logger.debug( '%s : extra environment for pid %s ->\n%s' % (self.path, data.sub.pid, unrolled)) reply = {}, 200 data.latch.set(reply) except Exception as failure: # # - any failure trapped during the configuration -> HTTP 406 # - the pod will shutdown automatically as well # reply = {}, 406 logger.warning( '%s : failed to configure -> %s, shutting down' % (self.path, diagnostic(failure))) self._request(['kill']) data.latch.set(reply) self.commands.popleft() return 'spin', data, 0
def spin(self, data): if self.terminate: if not data.sub: # # - kill the actor (which will release the latch and unlock the main loop) # self.exitcode() else: # # - this will force a reset and make sure we kill the process # - we'll loop back to spin() in any case and exitcode() this time # raise Aborted('terminating') elif self.commands: # # - we have at least one request pending # - pop the next command and run it (e.g switch the state-machine to it) # req, js, latch = self.commands[0] data.js = js data.latch = latch return req, data, 0 elif data.sub: # # - check if the process is still running # now = time.time() if data.sub.poll() is None: if now >= data.next_sanity_check: # # - schedule the next sanity check # - assert if the process aborted since the last one # data.next_sanity_check = now + self.check_every try: assert not data.failed, \ '%s : too many process failures (%d since last check)' % (self.path, data.failed) js = self.sanity_check(data.sub.pid) self.hints['metrics'] = {} if js is None else js data.checks = self.checks data.failed = 0 except Exception as failure: # # - any failure trapped during the sanity check will decrement our counter # - eventually the process is stopped (up to the user to decide what to do) # data.checks -= 1 data.failed = 0 logger.warning( '%s : sanity check (%d/%d) failed -> %s' % (self.path, self.checks - data.checks, self.checks, diagnostic(failure))) if not data.checks: logger.warning('%s : turning pod off' % self.path) data.checks = self.checks self._request(['off']) else: code = data.sub.returncode if not code: # # - a successful exit code (0) will automatically force a shutdown # - this is a convenient way for pods go down automatically once their task is done # logger.error('%s : pid %s exited, shutting down' % (self.path, data.sub.pid)) self._request(['kill']) else: # # - the process died on a non zero exit code # - increment the failure counter (too many failures in a row will fail the sanity check) # - restart it gracefully # data.failed += 1 logger.error('%s : pid %s died (code %d), re-running' % (self.path, data.sub.pid, code)) self._request(['off', 'on']) else: # # - reset by default the metrics if the sub-process is not running # self.hints['metrics'] = {} return 'spin', data, SAMPLING
def on(self, data): if data.forked and data.js and (self.strict or data.js['dependencies'] != self.last['dependencies']): # # - if we already have a process, we want to re-configure -> force a reset first # - this will go through a graceful termination process # - we'll come back here afterwards (with data.forked set to None) # raise Aborted('resetting to terminate pid %s first' % data.forked.pid) elif data.forked: # # - the process is already running, fail gracefully on a 200 # - this is the code-path used for instance up a leader request when strict is false # logger.debug('%s : skipping /control/on request' % self.path) data.latch.set(200) else: # # - no more process running, go on with the configuration # try: if not self.initialized: # # - if this is the 1st time the pod is running invoke the initialize() callback # - this is typically used to run once-only stuff such as attaching storage volumes, etc. # logger.info('%s : initializing pod' % self.path) self.initialize() self.initialized = 1 if data.js: # # - run the configuration procedure if we have some json # - we'll use whatever it returns to popen() a new process # - keep track of the shell command line returned by configure() for later # cluster = _Cluster(data.js) logger.info('%s : configuring pod %d/%d' % (self.path, 1 + cluster.index, cluster.size)) data.command, data.env = self.configure(cluster) self.last = data.js assert data.command, 'request to start process while not yet configured (user error ?)' # # - combine our environment variables with the overrides from configure() # - popen() the new process # - reset the sanity check counter # - keep track of its pid to kill it later on # now = time.time() env = deepcopy(self.env) env.update(data.env) tokens = data.command if self.shell else data.command.split( ' ') data.forked = Popen(tokens, cwd=self.cwd, env=env, shell=self.shell) data.checks = self.checks self.hints['process'] = 'running' logger.info('%s : started <%s> as pid %s' % (self.path, data.command, data.forked.pid)) if data.env: unrolled = '\n'.join( ['\t%s -> %s' % (k, v) for k, v in data.env.items()]) logger.debug('%s : extra environment for pid %s ->\n%s' % (self.path, data.forked.pid, unrolled)) data.next_sanity_check = now + SANITY data.latch.set(200) except Exception as failure: # # - any failure trapped during the configuration -> HTTP 406 # - the pod will shutdown automatically as well # logger.warning( '%s : failed to configure -> %s, shutting down' % (self.path, diagnostic(failure))) self._request(['kill']) data.latch.set(406) self.commands.popleft() return 'spin', data, 0
def spin(self, data): if self.terminate: if not data.forked: # # - kill the actor (which will release the latch and unlock the main loop) # self.exitcode() else: # # - this will force a reset and make sure we kill the process # - we'll loop back to spin() in any case and exitcode() this time # raise Aborted('terminating') if self.commands: # # - we have at least one request pending # - pop the next command and run it (e.g switch the state-machine to it) # req, js, latch = self.commands[0] data.js = js data.latch = latch return req, data, 0 if data.forked: # # - no request to run # - check if the process is still running and run the user-defined sanity check once in a while # now = time.time() if data.forked.poll() is not None: code = data.forked.returncode if not code: # # - a successful exit code (0) will automatically force a shutdown # - this is a convenient way for pods go down automatically once their task is done # logger.error('%s : pid %s exited, shutting down' % (self.path, data.forked.pid)) self._request(['kill']) else: # # - the process died on a non zero exit code # - restart it gracefully # logger.info('%s : pid %s died (code %d), re-running' % (self.path, data.forked.pid, code)) self._request(['off', 'on']) elif now >= data.next_sanity_check: try: # # - run the sanity check and schedule the next one # - reset it each time # data.next_sanity_check = now + SANITY self.sanity_check(data.forked.pid) data.checks = self.checks except Exception as failure: # # - any failure trapped during the sanity check will decrement our counter # - eventually the process is stopped (up to the user to decide what to do) # data.checks -= 1 if not data.checks: self._request(['off']) logger.warning('%s : sanity check (%d/%d) failed -> %s' % (self.path, self.checks - data.checks, self.checks, diagnostic(failure))) return 'spin', data, SAMPLING
def spin(self, data): # # - if the termination trigger is set or if we lost our connection, abort immediately # - this will free the lock and another controller will take the lead # if self.terminate: raise Aborted('terminating') now = time.time() if self.updated: # # - the update trigger is on # - unset it and query the last recorded hash # - any difference with what we have means we need to schedule a configuration # self.updated = 0 last, stats = self.zk.get('%s/%s.%s/hash' % (ROOT, self.scope, self.tag)) latest = self._md5() bad = latest != last if bad and not data.dirty: # # - the hash changed, switch the dirty trigger on # - this will start the countdown to configuration (which can be aborted if we fall back # on the same hash again, typically after a transient zookeeper connection loss) # logger.info( '%s : hash changed, configuration in %2.1f seconds' % (self.path, self.damper)) logger.debug('%s : hash -> %s' % (self.path, latest)) data.next = now + self.damper data.dirty = 1 elif not bad: # # - this case would typically map to a pod losing cnx to zk and joining again later # - based on how much damper we allow we can bridge transient idempotent changes # - very important -> make sure we set the snapshot (which could have been reset to {}) # data.dirty = 0 pods = self.snapshots['local'] self.zk.set('%s/%s.%s/snapshot' % (ROOT, self.scope, self.tag), json.dumps(pods)) logger.debug( '%s : pod update with no hash impact (did we just reconnect to zk ?)' % self.path) if not data.dirty: # # - all cool, the cluster is configured # - set the state as 'leader' # self.hints['state'] = 'leader' else: # # - trigger the configuration procedure # self.hints['state'] = 'leader (configuration pending)' remaining = max(0, data.next - now) if not remaining: return 'config', data, 0 # # - print some cool countdown # else: logger.debug('%s : configuration in %2.1f seconds' % (self.path, remaining)) return 'spin', data, SAMPLING
def spin(self, data): # # - if the termination trigger is set or if we lost our connection, abort immediately # - this will free the lock and another controller will take the lead # if self.terminate: raise Aborted('terminating') # # - if it is time to run the probe callback do it now # - schedule the next one # now = time.time() if self.updated: # # - the update trigger is on # - unset it and query the last recorded hash # - any difference with what we have means we need to schedule a configuration # self.updated = 0 last, stats = self.zk.get('%s/%s.%s/hash' % (ROOT, self.scope, self.tag)) latest = self._md5() bad = latest != last if bad and not data.dirty: # # - the hash changed, switch the dirty trigger on # - this will start the countdown to configuration (which can be aborted if we fall back # on the same hash again, typically after a transient zookeeper connection loss) # logger.info('%s : hash changed, configuration in %2.1f seconds' % (self.path, self.damper)) logger.debug('%s : hash -> %s' % (self.path, latest)) data.next = now + self.damper data.dirty = 1 elif not bad: # # - this case would typically map to a pod losing cnx to zk and joining again later # - based on how much damper we allow we can bridge transient idempotent changes # - very important -> make sure we set the snapshot (which could have been reset to {}) # - don't also forget to set data.last to enable probing # data.dirty = 0 pods = self.snapshots['local'] js = \ { 'pods': pods, 'dependencies': {k: v for k, v in self.snapshots.items() if k != 'local'} } data.last = js data.last['key'] = str(self.id) self.zk.set('%s/%s.%s/snapshot' % (ROOT, self.scope, self.tag), json.dumps(pods)) logger.debug('%s : pod update with no hash impact (did we just reconnect to zk ?)' % self.path) if not data.dirty: # # - all cool, the cluster is configured # - set the state as 'leader' # - fire a probe() if it is time to do so # self.hints['state'] = 'leader' if data.last and now > data.next_probe: try: # # - pass the latest cluster data to the probe() call # - if successful (e.g did not assert) set the status to whatever the callable returned # - unset if nothing was returned # snippet = self.probe(_Cluster(data.last)) self.hints['status'] = str(snippet) if snippet else '' except AssertionError as failure: # # - set the status to the assert message # self.hints['status'] = '* %s' % failure except Exception as failure: # # - something blew up in probe(), set the status accordingly # self.hints['status'] = '* probe() failed (check the code)' logger.warning('%s : probe() failed -> %s' % (self.path, diagnostic(failure))) data.next_probe = now + self.probe_every if self.hints['status']: logger.debug('%s : probe() -> "%s"' % (self.path, self.hints['status'])) else: # # - trigger the configuration procedure # self.hints['state'] = 'leader (configuration pending)' remaining = max(0, data.next - now) self.hints['status'] = '* configuration in %2.1f seconds' % remaining if not remaining: return 'config', data, 0 # # - print some cool countdown # else: logger.debug('%s : configuration in %2.1f seconds' % (self.path, remaining)) return 'spin', data, SAMPLING