def run(self): try: # # - first turn off the pods # - keep track of the indices # def _query(zk): replies = fire(zk, self.cluster, 'control/off', subset=self.indices) return [seq for _, (seq, _, code) in replies.items() if code == 200] pods = run(self.proxy, _query) # # - then turn those pod back on # def _query(zk): replies = fire(zk, self.cluster, 'control/on', subset=pods) return [seq for _, (seq, _, code) in replies.items() if code == 200] assert pods == run(self.proxy, _query), 'one or more pods failed to switch back on' self.out['reset'] = pods self.out['ok'] = True except AssertionError as failure: logger.debug('%s : failed to reset -> %s' % (self.cluster, failure)) except Exception as failure: logger.debug('%s : failed to reset -> %s' % (self.cluster, diagnostic(failure)))
def run(self): try: def _query(zk): replies = fire(zk, self.cluster, 'control/off', subset=self.subset) return [seq for _, (seq, _, code) in replies.items() if code == 200] js = run(self.proxy, _query) def _query(zk): replies = fire(zk, self.cluster, 'reset', subset=self.subset) return [seq for _, (seq, _, code) in replies.items() if code == 200] assert js == run(self.proxy, _query), 'one or more pods did not respond' def _query(zk): replies = fire(zk, self.cluster, 'control/on', subset=self.subset) return [seq for _, (seq, _, code) in replies.items() if code == 200] assert js == run(self.proxy, _query), 'one or more pods did not respond' self.out['reset'] = js self.out['ok'] = True except AssertionError as failure: logger.debug('%s : failed to reset -> %s' % (self.cluster, failure)) except Exception as failure: logger.debug('%s : failed to reset -> %s' % (self.cluster, diagnostic(failure)))
def run(self): try: # # - first turn the pod off # def _query(zk): replies = fire(zk, self.cluster, 'control/off', subset=self.indices) return [ seq for _, (seq, _, code) in replies.items() if code == 200 ] js = run(self.proxy, _query) # # - reset it # - this will force a reconnection to zookeeper # def _query(zk): replies = fire(zk, self.cluster, 'reset', subset=self.indices) return [ seq for _, (seq, _, code) in replies.items() if code == 200 ] assert js == run(self.proxy, _query), 'one or more pods did not respond' # # - then turn the pod back on # def _query(zk): replies = fire(zk, self.cluster, 'control/on', subset=self.indices) return [ seq for _, (seq, _, code) in replies.items() if code == 200 ] assert js == run(self.proxy, _query), 'one or more pods did not respond' self.out['reset'] = js self.out['ok'] = True except AssertionError as failure: logger.debug('%s : failed to reset -> %s' % (self.cluster, failure)) except Exception as failure: logger.debug('%s : failed to reset -> %s' % (self.cluster, diagnostic(failure)))
def run(self): try: # # - first turn off the pods # - keep track of the indices # def _query(zk): replies = fire(zk, self.cluster, 'control/off', subset=self.indices, timeout=self.timeout) return [ seq for _, (seq, _, code) in replies.items() if code == 200 ] pods = run(self.proxy, _query) # # - then turn those pod back on # def _query(zk): replies = fire(zk, self.cluster, 'control/on', subset=pods, timeout=self.timeout) return [ seq for _, (seq, _, code) in replies.items() if code == 200 ] assert pods == run( self.proxy, _query), 'one or more pods failed to switch back on' self.out['reset'] = pods self.out['ok'] = True except AssertionError as failure: logger.debug('%s : failed to reset -> %s' % (self.cluster, failure)) except Exception as failure: logger.debug('%s : failed to reset -> %s' % (self.cluster, diagnostic(failure)))
def body(self, args, unknown, proxy): assert args.force or args.indices, 'you must specify --force if -i is not set' if unknown is not None: args.cmdline += unknown files = {} headers = {'X-Shell': ' '.join(args.cmdline)} for token in args.cmdline: if path.isfile(token): with open(token, 'rb') as f: files[token] = f.read() def _query(zk): replies = fire(zk, args.clusters[0], 'exec', subset=args.indices, headers=headers, files=files, timeout=args.timeout) return len(replies), {key: js for key, (_, js, code) in replies.items() if code == 200} total, js = run(proxy, _query) pct = ((len(js) * 100) / total) if total else 0 if args.json: logger.info(json.dumps(js)) elif js: logger.info('<%s> -> %d%% replies (%d pods total) ->\n' % (args.clusters[0], pct, len(js))) for key, log in js.items(): suffix = '\n\n %s\n' % '\n '.join(log['stdout']) if log['stdout'] else '' logger.info('- %s (exit code %d)%s' % (key, log['code'], suffix)) return 0 if pct == 100 else 1
def body(self, args, unknown, proxy): port = str(args.port[0]) def _query(zk): replies = fire(zk, args.clusters, 'info') return len(replies), [[key, '|', hints['ip'], '|', hints['public'], '|', str(hints['ports'][port])] for key, (_, hints, code) in sorted(replies.items()) if code == 200 and port in hints['ports']] total, js = run(proxy, _query) pct = (len(js) * 100) / total if total else 0 if args.json: out = {item[0]: {'ip': item[2], 'public': item[4], 'ports': item[6]} for item in js} logger.info(json.dumps(out)) elif js: # # - justify & format the whole thing in a nice set of columns # logger.info('<%s> -> %d%% replies (%d pods total) ->\n' % (args.clusters, pct, len(js))) rows = [['pod', '|', 'pod IP', '|', 'public IP', '|', 'port'], ['', '|', '', '|', '', '|', '']] + js widths = [max(map(len, col)) for col in zip(*rows)] for row in rows: logger.info(' '.join((val.ljust(width) for val, width in zip(row, widths)))) return 0
def body(self, args, proxy): outs = {} for token in args.clusters: def _query(zk): replies = fire(zk, token, 'info') return len(replies), [[key, '|', hints['ip'], '|', hints['node'], '|', hints['process'], '|', hints['state']] for key, (_, hints, code) in sorted(replies.items()) if code == 200] total, js = run(proxy, _query) outs.update({item[0]: {'ip': item[2], 'node': item[4], 'process': item[6], 'state': item[8]} for item in js}) if js and not args.json: # # - justify & format the whole thing in a nice set of columns # pct = (len(js) * 100) / total logger.info('<%s> -> %d%% replies (%d pods total) ->\n' % (token, pct, len(js))) rows = [['pod', '|', 'pod IP', '|', 'node', '|', 'process', '|', 'state'], ['', '|', '', '|', '', '|', '', '|', '']] + js widths = [max(map(len, col)) for col in zip(*rows)] for row in rows: logger.info(' '.join((val.ljust(width) for val, width in zip(row, widths)))) if args.json: logger.info(json.dumps(outs))
def body(self, args, unknown, proxy): port = str(args.port[0]) def _query(zk): replies = fire(zk, args.clusters, 'info') return len(replies), [[key, '|', hints['ip'], '|', hints['public'], '|', str(hints['ports'][port])] for key, (_, hints, code) in sorted(replies.items()) if code == 200 and port in hints['ports']] total, js = run(proxy, _query) pct = (len(js) * 100) / total if total else 0 if args.json: out = {item[0]: {'ip': item[2], 'public': item[4], 'ports': item[6]} for item in js} logger.info(json.dumps(out)) elif js: # # - justify & format the whole thing in a nice set of columns # logger.info('<%s> -> %d%% replies (%d pods total) ->\n' % (args.clusters, pct, len(js))) rows = [['pod', '|', 'pod IP', '|', 'public IP', '|', 'TCP'], ['', '|', '', '|', '', '|', '']] + js widths = [max(map(len, col)) for col in zip(*rows)] for row in rows: logger.info(' '.join((val.ljust(width) for val, width in zip(row, widths)))) return 0
def run(self): try: def _query(zk): replies = fire(zk, self.cluster, 'control/on', subset=self.indices, timeout=self.timeout) return len(replies), [ seq for seq, (_, _, code) in replies.items() if code == 200 ] total, js = run(self.proxy, _query) assert len(js) == total, '1 or more pod failed to stop' self.out['on'] = js self.out['ok'] = True except AssertionError as failure: logger.debug('%s : failed to switch on -> %s' % (self.cluster, failure)) except Exception as failure: logger.debug('%s : failed to switch on -> %s' % (self.cluster, diagnostic(failure)))
def body(self, args, proxy): for token in args.clusters: def _query(zk): replies = fire(zk, token, 'info') return len(replies), \ [[key, '|', hints['ip'], '|', hints['process'], '|', hints['state']] for key, (_, hints, code) in replies.items() if code == 200] total, js = run(proxy, _query) if not total: logger.info('\n<%s> -> no pods found' % token) else: # # - justify & format the whole thing in a nice set of columns # pct = (len(js) * 100) / total logger.info('\n<%s> -> %d%% replies (%d pods total) ->\n' % (token, pct, total)) rows = [['cluster', '|', 'pod IP', '|', 'process', '|', 'state'], ['', '|', '', '|', '', '|', '']] + js widths = [max(map(len, col)) for col in zip(*rows)] for row in rows: logger.info(" ".join((val.ljust(width) for val, width in zip(row, widths))))
def body(self, args, proxy): # # - grab the user metrics returned in sanity_check() # - those are returned via a POST /info # outs = {} for token in args.clusters: def _query(zk): replies = fire(zk, token, 'info') return len(replies), {key: hints['metrics'] for key, (index, hints, code) in replies.items() if code == 200 and 'metrics' in hints} total, js = run(proxy, _query) outs.update(js) # # - prettify if not asked for a json string # if js and not args.json: pct = (len(js) * 100) / total logger.info('%d pods, %d%% replies ->\n' % (len(js), pct)) rows = [['pod', '|', 'metrics'], ['', '|', '']] + sorted([[key, '|', json.dumps(val)] for key, val in js.iteritems()]) widths = [max(map(len, col)) for col in zip(*rows)] for row in rows: logger.info(' '.join((val.ljust(width) for val, width in zip(row, widths)))) if args.json: logger.info(json.dumps(outs))
def body(self, args, _, proxy): def _query(zk): replies = fire(zk, args.clusters, 'info') return len(replies), { key: hints['metrics'] for key, (index, hints, code) in replies.items() if code == 200 and 'metrics' in hints } total, js = run(proxy, _query) pct = ((len(js) * 100) / total) if total else 0 if args.json: logger.info(json.dumps(js)) elif js: # # - justify & format the whole thing in a nice set of columns # logger.info('<%s> -> %d%% replies (%d pods total) ->\n' % (args.clusters, pct, len(js))) rows = [['pod', '|', 'metrics'], ['', '|', '']] + sorted( [[key, '|', json.dumps(val)] for key, val in js.iteritems()]) widths = [max(map(len, col)) for col in zip(*rows)] for row in rows: logger.info(' '.join( (val.ljust(width) for val, width in zip(row, widths)))) return 0
def body(self, args, proxy): try: with open(args.yaml[0], 'r') as f: payload = yaml.load(f) total = 0 merged = {} for token in args.clusters: def _query(zk): replies = fire(zk, token, 'control/signal', js=json.dumps(payload)) return len(replies), {key: data for key, (_, data, code) in replies.items() if code == 200} pods, js = run(proxy, _query) merged.update(js) total += pods pct = (len(merged) * 100) / total if total else 0 logger.info(json.dumps(merged) if args.json else '%d%% replies, pinged %d pods' % (pct, len(merged))) except IOError: logger.info('unable to load %s' % args.yaml[0]) except YAMLError as failure: if hasattr(failure, 'problem_mark'): mark = failure.problem_mark assert 0, '%s is invalid (line %s, column %s)' % (args.yaml, mark.line+1, mark.column+1)
def body(self, args, proxy): for token in args.clusters: def _query(zk): replies = fire(zk, token, 'info') return len(replies), { key: hints for key, (_, hints, code) in replies.items() if code == 200 } total, js = run(proxy, _query) if not total: logger.info('\n<%s> -> no pods found' % token) else: # # - justify & format the whole thing in a nice set of columns # pct = (len(js) * 100) / total unrolled = [ '%s\n%s\n' % (k, json.dumps(js[k], indent=4, separators=(',', ': '))) for k in sorted(js.keys()) ] logger.info( '\n<%s> -> %d%% replies (%d pods total) ->\n\n- %s' % (token, pct, total, '\n- '.join(unrolled)))
def _spin(): def _query(zk): replies = fire(zk, self.cluster, 'info') return [(hints['process'], seq) for seq, hints, _ in replies.values() if hints['process'] in target] js = run(self.proxy, _query) assert len(js) == capacity, 'not all pods running yet' return js
def _spin(): def _query(zk): replies = fire(zk, self.cluster, 'info') return [seq for seq, _, _ in replies.values()] js = run(self.proxy, _query) assert len(js) == target, 'not all pods running yet' return js
def _spin(): def _query(zk): replies = fire(zk, qualified, 'info') return [(hints['process'], seq) for seq, hints, _ in replies.values() if hints['application'] == application and hints['process'] in target] js = run(self.proxy, _query) assert len(js) == self.pods, 'not all pods running yet' return js
def run(self): try: # # - first turn the pod off # def _query(zk): replies = fire(zk, self.cluster, 'control/off', subset=self.indices) return [seq for _, (seq, _, code) in replies.items() if code == 200] js = run(self.proxy, _query) # # - reset it # - this will force a reconnection to zookeeper # def _query(zk): replies = fire(zk, self.cluster, 'reset', subset=self.indices) return [seq for _, (seq, _, code) in replies.items() if code == 200] assert js == run(self.proxy, _query), 'one or more pods did not respond' # # - then turn the pod back on # def _query(zk): replies = fire(zk, self.cluster, 'control/on', subset=self.indices) return [seq for _, (seq, _, code) in replies.items() if code == 200] assert js == run(self.proxy, _query), 'one or more pods did not respond' self.out['reset'] = js self.out['ok'] = True except AssertionError as failure: logger.debug('%s : failed to reset -> %s' % (self.cluster, failure)) except Exception as failure: logger.debug('%s : failed to reset -> %s' % (self.cluster, diagnostic(failure)))
def body(self, args, proxy): for token in args.clusters: def _query(zk): return fire(zk, token, 'log') js = run(proxy, _query) for pod in sorted(js.keys()): _, log, code = js[pod] if code == 200: logger.info('\n%s ->\n\n- %s' % (pod, '- '.join(log[-64:])))
def body(self, args, proxy): for token in args.clusters: def _query(zk): replies = fire(zk, token, 'control/on', subset=args.indices) return len(replies), [pod for pod, (_, _, code) in replies.items() if code == 200] total, js = run(proxy, _query) if js: pct = (len(js) * 100) / total logger.info('<%s> -> %d%% replies, %d pods on' % (token, pct, len(js)))
def _spin(): def _query(zk): replies = fire(zk, self.cluster, 'info') return [(seq, hints['application'], hints['task']) for (seq, hints, _) in replies.values()] js = run(self.proxy, _query) if self.group is not None: nb_pods = sum(1 for (_, key, _) in js if key == app) else: nb_pods = len(js) assert nb_pods == target, 'not all pods running yet' return js
def body(self, args, proxy): def _query(zk): responses = fire(zk, '*', 'info') return {key: hints['process'] for key, (_, hints, code) in responses.items() if code == 200} js = run(proxy, _query) if not js: logger.info('\n0 pods') else: running = [pod for pod, state in js.items() if state == 'running'] pct = int((100 * len(running)) / len(js)) logger.info('\n%d pods, %d%% running ->\n - %s' % (len(js), pct, '\n - '.join(sorted(running))))
def body(self, args, proxy): for token in args.clusters: def _query(zk): replies = fire(zk, token, 'log', subset=args.indices) return len(replies), {key: log for key, (_, log, code) in replies.items() if code == 200} total, js = run(proxy, _query) if js: pct = ((len(js) * 100) / total) unrolled = ['- %s\n\n %s' % (key, ' '.join(log if args.long else log[-16:])) for key, log in js.items()] logger.info('<%s> -> %d%% replies (%d pods total) ->\n%s' % (token, pct, len(js), '\n'.join(unrolled)))
def _spin(): def _query(zk): replies = fire(zk, self.cluster, 'control/kill', subset=self.indices, timeout=self.timeout) return [(code, seq) for seq, _, code in replies.values()] # # - fire the request one or more pods # - wait for every pod to report back a HTTP 410 (GONE) # - this means the ochopod state-machine is now idling (e.g dead) # js = run(self.proxy, _query) gone = sum(1 for code, _ in js if code == 410) assert gone == len(js), 'at least one pod is still running' return [seq for _, seq in js]
def body(self, args, proxy): assert args.force or args.subset, 'you must specify --force if -i is not set' for token in args.clusters: def _query(zk): replies = fire(zk, token, 'control/off', subset=args.subset) return len(replies), [pod for pod, (_, _, code) in replies.items() if code == 200] total, js = run(proxy, _query) if js: pct = (len(js) * 100) / total logger.info('<%s> -> %d%% replies, %d pods off' % (token, pct, len(js)))
def _spin(): def _query(zk): replies = fire(zk, self.cluster, 'control/kill', timeout=self.timeout) return [(code, seq) for seq, _, code in replies.values()] # # - fire the request one or more pods # - wait for every pod to report back a HTTP 410 (GONE) # - this means the ochopod state-machine is now idling (e.g dead) # js = run(self.proxy, _query) gone = sum(1 for code, _ in js if code == 410) assert gone == len(js), 'at least one pod is still running' return
def body(self, args, proxy): for token in args.clusters: def _query(zk): replies = fire(zk, token, 'control/off') return len(replies), [pod for pod, (_, hints, code) in replies.items() if code == 200] total, js = run(proxy, _query) if not total: logger.info("\n<%s> -> no pods found" % token) else: pct = (len(js) * 100) / total logger.info('\n<%s> -> %d%% replies, %d pods off' % (token, pct, len(js)))
def body(self, args, proxy): outs = {} for token in args.clusters: def _query(zk): replies = fire(zk, token, 'info') return len(replies), [[ key, '|', hints['ip'], '|', hints['node'], '|', hints['process'], '|', hints['state'] ] for key, (_, hints, code) in sorted(replies.items()) if code == 200] total, js = run(proxy, _query) outs.update({ item[0]: { 'ip': item[2], 'node': item[4], 'process': item[6], 'state': item[8] } for item in js }) if js and not args.json: # # - justify & format the whole thing in a nice set of columns # pct = (len(js) * 100) / total logger.info('<%s> -> %d%% replies (%d pods total) ->\n' % (token, pct, len(js))) rows = [[ 'pod', '|', 'pod IP', '|', 'node', '|', 'process', '|', 'state' ], ['', '|', '', '|', '', '|', '', '|', '']] + js widths = [max(map(len, col)) for col in zip(*rows)] for row in rows: logger.info(' '.join( (val.ljust(width) for val, width in zip(row, widths)))) if args.json: logger.info(json.dumps(outs))
def body(self, args, _, proxy): def _query(zk): replies = fire(zk, '*', 'info') return len(replies), { key: hints for key, (_, hints, code) in replies.items() if code == 200 } out = {} total, js = run(proxy, _query) pct = ((len(js) * 100) / total) if total else 0 for key, hints in js.items(): qualified = key.split(' ')[0] if not qualified in out: out[qualified] = \ { 'total': 0, 'running': 0, 'status': '' } item = out[qualified] item['total'] += 1 if hints['process'] == 'running': item['running'] += 1 if 'status' in hints and hints['status']: item['status'] = hints['status'] if args.json: logger.info(json.dumps(out)) elif js: logger.info('%d pods, %d%% replies ->\n' % (len(js), pct)) unrolled = [[ key, '|', '%d/%d' % (item['running'], item['total']), '|', item['status'] ] for key, item in sorted(out.items())] rows = [['cluster', '|', 'ok', '|', 'status'], ['', '|', '', '|', '']] + unrolled widths = [max(map(len, col)) for col in zip(*rows)] for row in rows: logger.info(' '.join( (val.ljust(width) for val, width in zip(row, widths)))) return 0
def body(self, args, _, proxy): def _query(zk): replies = fire(zk, args.clusters, 'log', subset=args.indices) return len(replies), {key: log for key, (_, log, code) in replies.items() if code == 200} total, js = run(proxy, _query) pct = ((len(js) * 100) / total) if total else 0 if js: # # - justify & format the whole thing # unrolled = ['- %s\n\n %s' % (key, ' '.join(log if args.long else log[-16:])) for key, log in js.items()] logger.info('<%s> -> %d%% replies (%d pods total) ->\n%s' % (args.clusters, pct, len(js), '\n'.join(unrolled))) return 0
def body(self, args, proxy): for token in args.clusters: def _query(zk): replies = fire(zk, token, 'control/on', subset=args.indices) return len(replies), [ pod for pod, (_, _, code) in replies.items() if code == 200 ] total, js = run(proxy, _query) if js: pct = (len(js) * 100) / total logger.info('<%s> -> %d%% replies, %d pods on' % (token, pct, len(js)))
def body(self, args, unknown, proxy): assert args.force or args.indices, 'you must specify --force if -i is not set' if unknown is not None: args.cmdline += unknown files = {} headers = {'X-Shell': ' '.join(args.cmdline)} for token in args.cmdline: if path.isfile(token): with open(token, 'rb') as f: files[token] = f.read() def _query(zk): replies = fire(zk, args.clusters[0], 'exec', subset=args.indices, headers=headers, files=files, timeout=args.timeout) return len(replies), { key: js for key, (_, js, code) in replies.items() if code == 200 } total, js = run(proxy, _query) pct = ((len(js) * 100) / total) if total else 0 if args.json: logger.info(json.dumps(js)) elif js: logger.info('<%s> -> %d%% replies (%d pods total) ->\n' % (args.clusters[0], pct, len(js))) for key, log in js.items(): suffix = '\n\n %s\n' % '\n '.join( log['stdout']) if log['stdout'] else '' logger.info('- %s (exit code %d)%s' % (key, log['code'], suffix)) return 0 if pct == 100 else 1
def body(self, args, proxy): def _query(zk): replies = fire(zk, '*', 'info') return len(replies), [hints['node'] for _, (_, hints, code) in replies.items() if code == 200] total, js = run(proxy, _query) if js: rollup = {key: 0 for key in set(js)} for node in js: rollup[node] += 1 pct = (100 * len(js)) / total logger.info('%d pods, %d%% replies ->\n' % (len(js), pct)) unrolled = [[key, '|', '%d%%' % ((100 * n) / total)] for key, n in sorted(rollup.items())] rows = [['node', '|', 'load'], ['', '|', '']] + unrolled widths = [max(map(len, col)) for col in zip(*rows)] for row in rows: logger.info(' '.join((val.ljust(width) for val, width in zip(row, widths))))
def run(self): try: def _query(zk): replies = fire(zk, self.cluster, 'control/on', subset=self.indices, timeout=self.timeout) return len(replies), [seq for seq, (_, _, code) in replies.items() if code == 200] total, js = run(self.proxy, _query) assert len(js) == total, '1 or more pod failed to stop' self.out['on'] = js self.out['ok'] = True except AssertionError as failure: logger.debug('%s : failed to switch on -> %s' % (self.cluster, failure)) except Exception as failure: logger.debug('%s : failed to switch on -> %s' % (self.cluster, diagnostic(failure)))
def body(self, args, proxy): try: with open(args.yaml[0], 'r') as f: payload = yaml.load(f) total = 0 merged = {} for token in args.clusters: def _query(zk): replies = fire(zk, token, 'control/signal', js=json.dumps(payload)) return len(replies), { key: data for key, (_, data, code) in replies.items() if code == 200 } pods, js = run(proxy, _query) merged.update(js) total += pods pct = (len(merged) * 100) / total if total else 0 logger.info( json.dumps(merged) if args. json else '%d%% replies, pinged %d pods' % (pct, len(merged))) except IOError: logger.info('unable to load %s' % args.yaml[0]) except YAMLError as failure: if hasattr(failure, 'problem_mark'): mark = failure.problem_mark assert 0, '%s is invalid (line %s, column %s)' % ( args.yaml, mark.line + 1, mark.column + 1)
def body(self, args, proxy): port = str(args.port[0]) for cluster in args.clusters: def _query(zk): replies = fire(zk, cluster, 'info') return len(replies), [[key, '|', hints['ip'], '|', hints['public'], '|', str(hints['ports'][port])] for key, (_, hints, code) in sorted(replies.items()) if code == 200 and port in hints['ports']] total, js = run(proxy, _query) if js: # # - justify & format the whole thing in a nice set of columns # pct = (len(js) * 100) / total logger.info('<%s> -> %d%% replies (%d pods total) ->\n' % (cluster, pct, len(js))) rows = [['pod', '|', 'pod IP', '|', 'public IP', '|', 'TCP'], ['', '|', '', '|', '']] + js widths = [max(map(len, col)) for col in zip(*rows)] for row in rows: logger.info(' '.join((val.ljust(width) for val, width in zip(row, widths))))
def body(self, args, proxy): for token in args.clusters: def _query(zk): replies = fire(zk, token, 'info') return len(replies), {key: hints for key, (_, hints, code) in replies.items() if code == 200} total, js = run(proxy, _query) if not total: logger.info('\n<%s> -> no pods found' % token) else: # # - justify & format the whole thing in a nice set of columns # pct = (len(js) * 100) / total unrolled = ['%s\n%s\n' % (k, json.dumps(js[k], indent=4, separators=(',', ': '))) for k in sorted(js.keys())] logger.info('\n<%s> -> %d%% replies (%d pods total) ->\n\n- %s' % (token, pct, total, '\n- '.join(unrolled)))
def body(self, args, proxy): assert args.force or args.indices, 'you must specify --force if -i is not set' for token in args.clusters: def _query(zk): replies = fire(zk, token, 'control/off', subset=args.indices) return len(replies), [ pod for pod, (_, _, code) in replies.items() if code == 200 ] total, js = run(proxy, _query) if js: pct = (len(js) * 100) / total logger.info('<%s> -> %d%% replies, %d pods off' % (token, pct, len(js)))
def body(self, args, proxy): def _query(zk): replies = fire(zk, '*', 'info') return len(replies), {key: hints for key, (_, hints, code) in replies.items() if code == 200} total, js = run(proxy, _query) if js: out = {} for key, hints in js.items(): qualified = key.split(' ')[0] if not qualified in out: out[qualified] = \ { 'total': 0, 'running': 0, 'status': '' } item = out[qualified] item['total'] += 1 if hints['process'] == 'running': item['running'] += 1 if 'status' in hints and hints['status']: item['status'] = hints['status'] if args.json: logger.info(json.dumps(out)) else: pct = (100 * len(js)) / total logger.info('%d pods, %d%% replies ->\n' % (len(js), pct)) unrolled = [[key, '|', '%d/%d' % (item['running'], item['total']), '|', item['status']] for key, item in sorted(out.items())] rows = [['cluster', '|', 'ok', '|', 'status'], ['', '|', '', '|', '']] + unrolled widths = [max(map(len, col)) for col in zip(*rows)] for row in rows: logger.info(' '.join((val.ljust(width) for val, width in zip(row, widths))))
def body(self, args, _, proxy): def _query(zk): replies = fire(zk, args.clusters, 'info') return len(replies), {key: hints['metrics'] for key, (index, hints, code) in replies.items() if code == 200 and 'metrics' in hints} total, js = run(proxy, _query) pct = ((len(js) * 100) / total) if total else 0 if args.json: logger.info(json.dumps(js)) elif js: # # - justify & format the whole thing in a nice set of columns # logger.info('<%s> -> %d%% replies (%d pods total) ->\n' % (args.clusters, pct, len(js))) rows = [['pod', '|', 'metrics'], ['', '|', '']] + sorted([[key, '|', json.dumps(val)] for key, val in js.iteritems()]) widths = [max(map(len, col)) for col in zip(*rows)] for row in rows: logger.info(' '.join((val.ljust(width) for val, width in zip(row, widths)))) return 0
def body(self, args, proxy): def _query(zk): replies = fire(zk, '*', 'info') return len(replies), [ hints['node'] for _, (_, hints, code) in replies.items() if code == 200 ] total, js = run(proxy, _query) if js: rollup = {key: 0 for key in set(js)} for node in js: rollup[node] += 1 pct = (100 * len(js)) / total logger.info('%d pods, %d%% replies ->\n' % (len(js), pct)) unrolled = [[key, '|', '%d%%' % ((100 * n) / total)] for key, n in sorted(rollup.items())] rows = [['node', '|', 'load'], ['', '|', '']] + unrolled widths = [max(map(len, col)) for col in zip(*rows)] for row in rows: logger.info(' '.join( (val.ljust(width) for val, width in zip(row, widths))))
def body(self, args, _, proxy): def _query(zk): replies = fire(zk, args.clusters, 'log', subset=args.indices) return len(replies), { key: log for key, (_, log, code) in replies.items() if code == 200 } total, js = run(proxy, _query) pct = ((len(js) * 100) / total) if total else 0 if js: # # - justify & format the whole thing # unrolled = [ '- %s\n\n %s' % (key, ' '.join(log if args.long else log[-16:])) for key, log in js.items() ] logger.info('<%s> -> %d%% replies (%d pods total) ->\n%s' % (args.clusters, pct, len(js), '\n'.join(unrolled))) return 0
def body(self, args, proxy): outs = {} port = str(args.port[0]) for cluster in args.clusters: def _query(zk): replies = fire(zk, cluster, "info") return ( len(replies), [ [key, "|", hints["ip"], "|", hints["public"], "|", str(hints["ports"][port])] for key, (_, hints, code) in sorted(replies.items()) if code == 200 and port in hints["ports"] ], ) total, js = run(proxy, _query) outs.update({item[0]: {"ip": item[2], "public": item[4], "ports": item[6]} for item in js}) if js and not args.json: # # - justify & format the whole thing in a nice set of columns # pct = (len(js) * 100) / total logger.info("<%s> -> %d%% replies (%d pods total) ->\n" % (cluster, pct, len(js))) rows = [["pod", "|", "pod IP", "|", "public IP", "|", "TCP"], ["", "|", "", "|", "", "|", ""]] + js widths = [max(map(len, col)) for col in zip(*rows)] for row in rows: logger.info(" ".join((val.ljust(width) for val, width in zip(row, widths)))) if args.json: logger.info(json.dumps(outs))
def run(self): try: # # - we need to pass the framework master IPs around (ugly) # assert 'MARATHON_MASTER' in os.environ, '$MARATHON_MASTER not specified (check your portal pod)' master = choice(os.environ['MARATHON_MASTER'].split(',')) headers = \ { 'content-type': 'application/json', 'accept': 'application/json' } # # - first peek and see what pods we have # - they should all map to one single marathon application (abort if not) # - we'll use the application identifier to retrieve the configuration json later on # def _query(zk): replies = fire(zk, self.cluster, 'info') return [ hints['application'] for (_, hints, _) in replies.values() ] js = run(self.proxy, _query) assert len( set(js) ) == 1, '%s is mapping to 2+ marathon applications' % self.cluster app = js[0] # # - fetch the various versions for our app # - we want to get hold of the most recent configuration # url = 'http://%s/v2/apps/%s/versions' % (master, app) reply = get(url, headers=headers) code = reply.status_code logger.debug('-> %s (HTTP %d)' % (url, code)) assert code == 200 or code == 201, 'delete failed (HTTP %d)' % code js = reply.json() # # - retrieve the latest one # - keep the docker container configuration and the # of tasks around # last = js['versions'][0] url = 'http://%s/v2/apps/%s/versions/%s' % (master, app, last) reply = get(url, headers=headers) code = reply.status_code logger.debug('-> %s (HTTP %d)' % (url, code)) assert code == 200 or code == 201, 'delete failed (HTTP %d)' % code js = reply.json() spec = js['container'] tag = spec['docker']['image'] capacity = js['instances'] # # - kill all the pods using a POST /control/kill # - wait for them to be dead # @retry(timeout=self.timeout, pause=0) def _spin(): def _query(zk): replies = fire(zk, self.cluster, 'control/kill', timeout=self.timeout) return [(code, seq) for seq, _, code in replies.values()] # # - fire the request one or more pods # - wait for every pod to report back a HTTP 410 (GONE) # - this means the ochopod state-machine is now idling (e.g dead) # js = run(self.proxy, _query) gone = sum(1 for code, _ in js if code == 410) assert gone == len(js), 'at least one pod is still running' return _spin() # # - grab the docker image # - just add a :<version> suffix (or replace it) but don't change the image proper # - update the image and PUT the new configuration back # - marathon will then kill & re-start all the tasks # tokens = tag.split(':') spec['docker']['image'] = \ '%s:%s' % (tag, self.version) if len(tokens) < 2 else '%s:%s' % (tokens[0], self.version) js = \ { 'container': spec } url = 'http://%s/v2/apps/%s' % (master, app) reply = put(url, data=json.dumps(js), headers=headers) code = reply.status_code logger.debug('-> %s (HTTP %d)' % (url, code)) logger.debug(reply.text) assert code == 200 or code == 201, 'update failed (HTTP %d)' % code # # - the pods should now be starting # - wait for all the pods to be in the 'running' mode (they are 'dead' right now) # - the sequence counters allocated to our new pods are returned as well # target = ['running'] if self.strict else ['stopped', 'running'] @retry(timeout=self.timeout, pause=3, default={}) def _spin(): def _query(zk): replies = fire(zk, self.cluster, 'info') return [(hints['process'], seq) for seq, hints, _ in replies.values() if hints['process'] in target] js = run(self.proxy, _query) assert len(js) == capacity, 'not all pods running yet' return js js = _spin() up = [seq for _, seq in js] assert len(up) == capacity, '1+ pods still not up (%d/%d)' % ( len(up), capacity) self.out['up'] = up self.out['ok'] = True logger.debug('%s : %d pods updated to version "%s"' % (self.cluster, capacity, self.version)) except AssertionError as failure: logger.debug('%s : failed to bump -> %s' % (self.cluster, failure)) except Exception as failure: logger.debug('%s : failed to bump -> %s' % (self.cluster, diagnostic(failure)))
def run(self): try: # # - we need to pass the framework master IPs around (ugly) # assert 'MARATHON_MASTER' in os.environ, '$MARATHON_MASTER not specified (check your portal pod)' master = choice(os.environ['MARATHON_MASTER'].split(',')) headers = \ { 'content-type': 'application/json', 'accept': 'application/json' } # # - first peek and see what pods we have # def _query(zk): replies = fire(zk, self.cluster, 'info') return [(seq, hints['application'], hints['task']) for (seq, hints, _) in replies.values()] # # - remap a bit differently and get an ordered list of task identifiers # - we'll use that to kill the newest pods # js = run(self.proxy, _query) total = len(js) if self.group is not None: # # - if -g was specify apply the scaling to the underlying marathon application containing that pod # - be careful to update the task list and total # of pods # keys = {seq: key for (seq, key, _) in js} assert self.group in keys, '#%d is not a valid pod index' % self.group app = keys[self.group] tasks = [(seq, task) for (seq, key, task) in sorted(js, key=(lambda _: _[0])) if key == app] total = sum(1 for (_, key, _) in js if key == app) else: # # - check and make sure all our pods map to one single marathon application # keys = set([key for (_, key, _) in js]) assert len(keys) == 1, '%s maps to more than one application, you must specify -g' % self.cluster tasks = [(seq, task) for (seq, _, task) in sorted(js, key=(lambda _: _[0]))] app = keys.pop() # # - infer the target # of pods based on the user-defined factor # operator = self.factor[0] assert operator in ['@', 'x'], 'invalid operator' n = float(self.factor[1:]) target = n if operator == '@' else total * n # # - clip the target # of pods down to 1 # target = max(1, int(target)) self.out['delta'] = target - total if target > total: # # - scale the application capacity up # js = \ { 'instances': target } url = 'http://%s/v2/apps/%s' % (master, app) reply = put(url, data=json.dumps(js), headers=headers) code = reply.status_code logger.debug('-> %s (HTTP %d)' % (url, code)) assert code == 200 or code == 201, 'update failed (HTTP %d)' % code # # - wait for all our new pods to be there # @retry(timeout=self.timeout, pause=3, default={}) def _spin(): def _query(zk): replies = fire(zk, self.cluster, 'info') return [seq for seq, _, _ in replies.values()] js = run(self.proxy, _query) assert len(js) == target, 'not all pods running yet' return js _spin() elif target < total: # # - if the fifo switch is on make sure to pick the oldest pods for deletion # tasks = tasks[:total - target] if self.fifo else tasks[target:] # # - kill all (or part of) the pods using a POST /control/kill # - wait for them to be dead # @retry(timeout=self.timeout, pause=0) def _spin(): def _query(zk): indices = [seq for (seq, _) in tasks] replies = fire(zk, self.cluster, 'control/kill', subset=indices, timeout=self.timeout) return [(code, seq) for seq, _, code in replies.values()] # # - fire the request one or more pods # - wait for every pod to report back a HTTP 410 (GONE) # - this means the ochopod state-machine is now idling (e.g dead) # js = run(self.proxy, _query) gone = sum(1 for code, _ in js if code == 410) assert gone == len(js), 'at least one pod is still running' return _spin() # # - delete all the underlying tasks at once using POST v2/tasks/delete # js = \ { 'ids': [task for (_, task) in tasks] } url = 'http://%s/v2/tasks/delete?scale=true' % master reply = post(url, data=json.dumps(js), headers=headers) code = reply.status_code logger.debug('-> %s (HTTP %d)' % (url, code)) assert code == 200 or code == 201, 'delete failed (HTTP %d)' % code self.out['ok'] = True except AssertionError as failure: logger.debug('%s : failed to scale -> %s' % (self.cluster, failure)) except Exception as failure: logger.debug('%s : failed to scale -> %s' % (self.cluster, diagnostic(failure)))
def run(self): try: # # - we need to pass the framework master IPs around (ugly) # assert 'MARATHON_MASTER' in os.environ, '$MARATHON_MASTER not specified (check your portal pod)' master = choice(os.environ['MARATHON_MASTER'].split(',')) headers = \ { 'content-type': 'application/json', 'accept': 'application/json' } # # - first peek and see what pods we have # def _query(zk): replies = fire(zk, self.cluster, 'info') return [(seq, hints['application'], hints['task']) for (seq, hints, _) in replies.values()] # # - remap a bit differently and get an ordered list of task identifiers # - we'll use that to kill the newest pods # js = run(self.proxy, _query) total = len(js) if self.group is not None: # # - if -g was specify apply the scaling to the underlying marathon application containing that pod # - be careful to update the task list and total # of pods # keys = {seq: key for (seq, key, _) in js} assert self.group in keys, '#%d is not a valid pod index' % self.group app = keys[self.group] tasks = [(seq, task) for (seq, key, task) in sorted(js, key=(lambda _: _[0])) if key == app] total = sum(1 for (_, key, _) in js if key == app) else: # # - check and make sure all our pods map to one single marathon application # keys = set([key for (_, key, _) in js]) assert len( keys ) == 1, '%s maps to more than one application, you must specify -g' % self.cluster tasks = [(seq, task) for (seq, _, task) in sorted(js, key=(lambda _: _[0])) ] app = keys.pop() # # - infer the target # of pods based on the user-defined factor # operator = self.factor[0] assert operator in ['@', 'x'], 'invalid operator' n = float(self.factor[1:]) target = n if operator == '@' else total * n # # - clip the target # of pods down to 1 # target = max(1, int(target)) self.out['delta'] = target - total if target > total: # # - scale the application capacity up # js = \ { 'instances': target } url = 'http://%s/v2/apps/%s' % (master, app) reply = put(url, data=json.dumps(js), headers=headers) code = reply.status_code logger.debug('-> %s (HTTP %d)' % (url, code)) assert code == 200 or code == 201, 'update failed (HTTP %d)' % code # # - wait for all our new pods to be there # @retry(timeout=self.timeout, pause=3, default={}) def _spin(): def _query(zk): replies = fire(zk, self.cluster, 'info') return [seq for seq, _, _ in replies.values()] js = run(self.proxy, _query) assert len(js) == target, 'not all pods running yet' return js _spin() elif target < total: # # - if the fifo switch is on make sure to pick the oldest pods for deletion # tasks = tasks[:total - target] if self.fifo else tasks[target:] # # - kill all (or part of) the pods using a POST /control/kill # - wait for them to be dead # @retry(timeout=self.timeout, pause=0) def _spin(): def _query(zk): indices = [seq for (seq, _) in tasks] replies = fire(zk, self.cluster, 'control/kill', subset=indices, timeout=self.timeout) return [(code, seq) for seq, _, code in replies.values()] # # - fire the request one or more pods # - wait for every pod to report back a HTTP 410 (GONE) # - this means the ochopod state-machine is now idling (e.g dead) # js = run(self.proxy, _query) gone = sum(1 for code, _ in js if code == 410) assert gone == len(js), 'at least one pod is still running' return _spin() # # - delete all the underlying tasks at once using POST v2/tasks/delete # js = \ { 'ids': [task for (_, task) in tasks] } url = 'http://%s/v2/tasks/delete?scale=true' % master reply = post(url, data=json.dumps(js), headers=headers) code = reply.status_code logger.debug('-> %s (HTTP %d)' % (url, code)) assert code == 200 or code == 201, 'delete failed (HTTP %d)' % code self.out['ok'] = True except AssertionError as failure: logger.debug('%s : failed to scale -> %s' % (self.cluster, failure)) except Exception as failure: logger.debug('%s : failed to scale -> %s' % (self.cluster, diagnostic(failure)))
def run(self): try: # # - we need to pass the framework master IPs around (ugly) # assert 'MARATHON_MASTER' in os.environ, '$MARATHON_MASTER not specified (check your portal pod)' master = choice(os.environ['MARATHON_MASTER'].split(',')) headers = \ { 'content-type': 'application/json', 'accept': 'application/json' } # # - kill all (or part of) the pods using a POST /control/kill # - wait for them to be dead # - warning, /control/kill will block (hence the 5 seconds timeout) # @retry(timeout=self.timeout, pause=0) def _spin(): def _query(zk): replies = fire(zk, self.cluster, 'control/kill', subset=self.indices, timeout=self.timeout) return [(code, seq) for seq, _, code in replies.values()] # # - fire the request one or more pods # - wait for every pod to report back a HTTP 410 (GONE) # - this means the ochopod state-machine is now idling (e.g dead) # js = run(self.proxy, _query) gone = sum(1 for code, _ in js if code == 410) assert gone == len(js), 'at least one pod is still running' return [seq for _, seq in js] down = _spin() self.out['down'] = down assert down, 'the cluster is either invalid or empty' logger.debug('%s : %d dead pods -> %s' % (self.cluster, len(down), ', '.join( ['#%d' % seq for seq in down]))) # # - now peek and see what pods we have # - we want to know what the underlying marathon application & task are # def _query(zk): replies = fire(zk, self.cluster, 'info', subset=self.indices) return [(hints['application'], hints['task']) for _, hints, _ in replies.values()] js = run(self.proxy, _query) rollup = {key: [] for key in set([key for key, _ in js])} for app, task in js: rollup[app] += [task] # # - go through each application # - query the it and check how many tasks it currently has # - the goal is to check if we should nuke the whole application or not # for app, tasks in rollup.items(): url = 'http://%s/v2/apps/%s/tasks' % (master, app) reply = get(url, headers=headers) code = reply.status_code logger.debug('%s : -> %s (HTTP %d)' % (self.cluster, url, code)) assert code == 200, 'task lookup failed (HTTP %d)' % code js = reply.json() if len(tasks) == len(js['tasks']): # # - all the containers running for that application were reported as dead # - issue a DELETE /v2/apps to nuke the whole thing # url = 'http://%s/v2/apps/%s' % (master, app) reply = delete(url, headers=headers) code = reply.status_code logger.debug('%s : -> %s (HTTP %d)' % (self.cluster, url, code)) assert code == 200 or code == 204, 'application deletion failed (HTTP %d)' % code else: # # - we killed a subset of that application's pods # - cherry pick the underlying tasks and delete them at once using POST v2/tasks/delete # js = \ { 'ids': tasks } url = 'http://%s/v2/tasks/delete?scale=true' % master reply = post(url, data=json.dumps(js), headers=headers) code = reply.status_code logger.debug('-> %s (HTTP %d)' % (url, code)) assert code == 200 or code == 201, 'delete failed (HTTP %d)' % code self.out['ok'] = True except AssertionError as failure: logger.debug('%s : failed to kill -> %s' % (self.cluster, failure)) except Exception as failure: logger.debug('%s : failed to kill -> %s' % (self.cluster, diagnostic(failure)))
def run(self): try: # # - we need to pass the framework master IPs around (ugly) # assert 'MARATHON_MASTER' in os.environ, '$MARATHON_MASTER not specified (check your portal pod)' master = choice(os.environ['MARATHON_MASTER'].split(',')) headers = \ { 'content-type': 'application/json', 'accept': 'application/json' } # # - first peek and see what pods we have # - they should all map to one single marathon application (abort if not) # - we'll use the application identifier to retrieve the configuration json later on # def _query(zk): replies = fire(zk, self.cluster, 'info') return [hints['application'] for (_, hints, _) in replies.values()] js = run(self.proxy, _query) assert len(set(js)) == 1, '%s is mapping to 2+ marathon applications' % self.cluster app = js[0] # # - fetch the various versions for our app # - we want to get hold of the most recent configuration # url = 'http://%s/v2/apps/%s/versions' % (master, app) reply = get(url, headers=headers) code = reply.status_code logger.debug('-> %s (HTTP %d)' % (url, code)) assert code == 200 or code == 201, 'delete failed (HTTP %d)' % code js = reply.json() # # - retrieve the latest one # - keep the docker container configuration and the # of tasks around # last = js['versions'][0] url = 'http://%s/v2/apps/%s/versions/%s' % (master, app, last) reply = get(url, headers=headers) code = reply.status_code logger.debug('-> %s (HTTP %d)' % (url, code)) assert code == 200 or code == 201, 'delete failed (HTTP %d)' % code js = reply.json() spec = js['container'] tag = spec['docker']['image'] capacity = js['instances'] # # - kill all the pods using a POST /control/kill # - wait for them to be dead # @retry(timeout=self.timeout, pause=0) def _spin(): def _query(zk): replies = fire(zk, self.cluster, 'control/kill', timeout=self.timeout) return [(code, seq) for seq, _, code in replies.values()] # # - fire the request one or more pods # - wait for every pod to report back a HTTP 410 (GONE) # - this means the ochopod state-machine is now idling (e.g dead) # js = run(self.proxy, _query) gone = sum(1 for code, _ in js if code == 410) assert gone == len(js), 'at least one pod is still running' return _spin() # # - grab the docker image # - just add a :<version> suffix (or replace it) but don't change the image proper # - update the image and PUT the new configuration back # - marathon will then kill & re-start all the tasks # tokens = tag.split(':') spec['docker']['image'] = \ '%s:%s' % (tag, self.version) if len(tokens) < 2 else '%s:%s' % (tokens[0], self.version) js = \ { 'container': spec } url = 'http://%s/v2/apps/%s' % (master, app) reply = put(url, data=json.dumps(js), headers=headers) code = reply.status_code logger.debug('-> %s (HTTP %d)' % (url, code)) logger.debug(reply.text) assert code == 200 or code == 201, 'update failed (HTTP %d)' % code # # - the pods should now be starting # - wait for all the pods to be in the 'running' mode (they are 'dead' right now) # - the sequence counters allocated to our new pods are returned as well # target = ['running'] if self.strict else ['stopped', 'running'] @retry(timeout=self.timeout, pause=3, default={}) def _spin(): def _query(zk): replies = fire(zk, self.cluster, 'info') return [(hints['process'], seq) for seq, hints, _ in replies.values() if hints['process'] in target] js = run(self.proxy, _query) assert len(js) == capacity, 'not all pods running yet' return js js = _spin() up = [seq for _, seq in js] assert len(up) == capacity, '1+ pods still not up (%d/%d)' % (len(up), capacity) self.out['up'] = up self.out['ok'] = True logger.debug('%s : %d pods updated to version "%s"' % (self.cluster, capacity, self.version)) except AssertionError as failure: logger.debug('%s : failed to bump -> %s' % (self.cluster, failure)) except Exception as failure: logger.debug('%s : failed to bump -> %s' % (self.cluster, diagnostic(failure)))
def run(self): try: # # - workaround to fetch the master IP and credentials as there does not seem to # be a way to use 10.0.0.2 from within the pod # assert 'KUBERNETES_MASTER' in os.environ, '$KUBERNETES_MASTER not specified (check your portal pod)' assert 'KUBERNETES_USER' in os.environ, '$KUBERNETES_USER not specified (check your portal pod)' assert 'KUBERNETES_PWD' in os.environ, '$KUBERNETES_PWD not specified (check your portal pod)' auth = HTTPBasicAuth(os.environ['KUBERNETES_USER'], os.environ['KUBERNETES_PWD']) def _query(zk): replies = fire(zk, self.cluster, 'info') return len(replies), {key: hints for key, (_, hints, code) in replies.items() if code == 200} # # - each pod refers to its controller via the 'application' hint # total, js = run(self.proxy, _query) assert total == len(js), 'failure to communicate with one or more pods' for key in set([hints['application'] for hints in js.values()]): # # - HTTP DELETE the controller via the master API # url = 'https://%s/api/v1beta3/namespaces/default/replicationcontrollers/%s' % (os.environ['KUBERNETES_MASTER'], key) reply = requests.delete(url, auth=auth,verify=False) code = reply.status_code logger.debug('-> DELETE %s (HTTP %d)' % (url, code)) assert code == 200 or code == 201, 'replication controller deletion failed (HTTP %d)' % code # # - the 'task' hint is the pod's identifier # for key, hints in js.items(): # # - HTTP DELETE the pod via the master API # url = 'https://%s/api/v1beta3/namespaces/default/pods/%s' % (os.environ['KUBERNETES_MASTER'], hints['task']) reply = requests.delete(url, auth=auth,verify=False) code = reply.status_code logger.debug('-> DELETE %s (HTTP %d)' % (url, code)) assert code == 200 or code == 201, 'pod deletion failed (HTTP %d)' % code self.killed = len(js) self.ok = 1 except AssertionError as failure: logger.debug('%s : failed to deploy -> %s' % (self.cluster, failure)) except YAMLError as failure: if hasattr(failure, 'problem_mark'): mark = failure.problem_mark logger.debug('%s : invalid deploy.yml (line %s, column %s)' % (self.cluster, mark.line+1, mark.column+1)) except Exception as failure: logger.debug('%s : failed to deploy -> %s' % (self.cluster, diagnostic(failure)))