コード例 #1
0
ファイル: reset.py プロジェクト: nicbet/ochothon
    def run(self):
        try:

            #
            # - first turn off the pods
            # - keep track of the indices
            #
            def _query(zk):
                replies = fire(zk, self.cluster, 'control/off', subset=self.indices)
                return [seq for _, (seq, _, code) in replies.items() if code == 200]

            pods = run(self.proxy, _query)

            #
            # - then turn those pod back on
            #
            def _query(zk):
                replies = fire(zk, self.cluster, 'control/on', subset=pods)
                return [seq for _, (seq, _, code) in replies.items() if code == 200]

            assert pods == run(self.proxy, _query), 'one or more pods failed to switch back on'

            self.out['reset'] = pods
            self.out['ok'] = True

        except AssertionError as failure:

            logger.debug('%s : failed to reset -> %s' % (self.cluster, failure))

        except Exception as failure:

            logger.debug('%s : failed to reset -> %s' % (self.cluster, diagnostic(failure)))
コード例 #2
0
ファイル: reset.py プロジェクト: lmok/ochothon
    def run(self):
        try:

            def _query(zk):
                replies = fire(zk, self.cluster, 'control/off', subset=self.subset)
                return [seq for _, (seq, _, code) in replies.items() if code == 200]

            js = run(self.proxy, _query)

            def _query(zk):
                replies = fire(zk, self.cluster, 'reset', subset=self.subset)
                return [seq for _, (seq, _, code) in replies.items() if code == 200]

            assert js == run(self.proxy, _query), 'one or more pods did not respond'

            def _query(zk):
                replies = fire(zk, self.cluster, 'control/on', subset=self.subset)
                return [seq for _, (seq, _, code) in replies.items() if code == 200]

            assert js == run(self.proxy, _query), 'one or more pods did not respond'

            self.out['reset'] = js
            self.out['ok'] = True

        except AssertionError as failure:

            logger.debug('%s : failed to reset -> %s' % (self.cluster, failure))

        except Exception as failure:

            logger.debug('%s : failed to reset -> %s' % (self.cluster, diagnostic(failure)))
コード例 #3
0
ファイル: reset.py プロジェクト: a3linux/ochothon
    def run(self):
        try:

            #
            # - first turn the pod off
            #
            def _query(zk):
                replies = fire(zk,
                               self.cluster,
                               'control/off',
                               subset=self.indices)
                return [
                    seq for _, (seq, _, code) in replies.items() if code == 200
                ]

            js = run(self.proxy, _query)

            #
            # - reset it
            # - this will force a reconnection to zookeeper
            #
            def _query(zk):
                replies = fire(zk, self.cluster, 'reset', subset=self.indices)
                return [
                    seq for _, (seq, _, code) in replies.items() if code == 200
                ]

            assert js == run(self.proxy,
                             _query), 'one or more pods did not respond'

            #
            # - then turn the pod back on
            #
            def _query(zk):
                replies = fire(zk,
                               self.cluster,
                               'control/on',
                               subset=self.indices)
                return [
                    seq for _, (seq, _, code) in replies.items() if code == 200
                ]

            assert js == run(self.proxy,
                             _query), 'one or more pods did not respond'

            self.out['reset'] = js
            self.out['ok'] = True

        except AssertionError as failure:

            logger.debug('%s : failed to reset -> %s' %
                         (self.cluster, failure))

        except Exception as failure:

            logger.debug('%s : failed to reset -> %s' %
                         (self.cluster, diagnostic(failure)))
コード例 #4
0
    def run(self):
        try:

            #
            # - first turn off the pods
            # - keep track of the indices
            #
            def _query(zk):
                replies = fire(zk,
                               self.cluster,
                               'control/off',
                               subset=self.indices,
                               timeout=self.timeout)
                return [
                    seq for _, (seq, _, code) in replies.items() if code == 200
                ]

            pods = run(self.proxy, _query)

            #
            # - then turn those pod back on
            #
            def _query(zk):
                replies = fire(zk,
                               self.cluster,
                               'control/on',
                               subset=pods,
                               timeout=self.timeout)
                return [
                    seq for _, (seq, _, code) in replies.items() if code == 200
                ]

            assert pods == run(
                self.proxy,
                _query), 'one or more pods failed to switch back on'

            self.out['reset'] = pods
            self.out['ok'] = True

        except AssertionError as failure:

            logger.debug('%s : failed to reset -> %s' %
                         (self.cluster, failure))

        except Exception as failure:

            logger.debug('%s : failed to reset -> %s' %
                         (self.cluster, diagnostic(failure)))
コード例 #5
0
ファイル: exec.py プロジェクト: autodesk-cloud/ochothon
        def body(self, args, unknown, proxy):

            assert args.force or args.indices, 'you must specify --force if -i is not set'

            if unknown is not None:
                args.cmdline += unknown

            files = {}
            headers = {'X-Shell': ' '.join(args.cmdline)}
            for token in args.cmdline:
                if path.isfile(token):
                    with open(token, 'rb') as f:
                        files[token] = f.read()

            def _query(zk):
                replies = fire(zk, args.clusters[0], 'exec', subset=args.indices, headers=headers, files=files, timeout=args.timeout)
                return len(replies), {key: js for key, (_, js, code) in replies.items() if code == 200}

            total, js = run(proxy, _query)
            pct = ((len(js) * 100) / total) if total else 0
            if args.json:
                logger.info(json.dumps(js))

            elif js:
                logger.info('<%s> -> %d%% replies (%d pods total) ->\n' % (args.clusters[0], pct, len(js)))
                for key, log in js.items():
                    suffix = '\n\n %s\n' % '\n '.join(log['stdout']) if log['stdout'] else ''
                    logger.info('- %s (exit code %d)%s' % (key, log['code'], suffix))

            return 0 if pct == 100 else 1
コード例 #6
0
        def body(self, args, unknown, proxy):

            port = str(args.port[0])

            def _query(zk):
                replies = fire(zk, args.clusters, 'info')
                return len(replies), [[key, '|', hints['ip'], '|', hints['public'], '|', str(hints['ports'][port])] for key, (_, hints, code) in sorted(replies.items()) if code == 200 and port in hints['ports']]

            total, js = run(proxy, _query)
            pct = (len(js) * 100) / total if total else 0
            if args.json:
                out = {item[0]: {'ip': item[2], 'public': item[4], 'ports': item[6]} for item in js}
                logger.info(json.dumps(out))

            elif js:

                #
                # - justify & format the whole thing in a nice set of columns
                #
                logger.info('<%s> -> %d%% replies (%d pods total) ->\n' % (args.clusters, pct, len(js)))
                rows = [['pod', '|', 'pod IP', '|', 'public IP', '|', 'port'], ['', '|', '', '|', '', '|', '']] + js
                widths = [max(map(len, col)) for col in zip(*rows)]
                for row in rows:
                    logger.info('  '.join((val.ljust(width) for val, width in zip(row, widths))))

            return 0
コード例 #7
0
ファイル: grep.py プロジェクト: a3linux/ochothon
        def body(self, args, proxy):

            outs = {}

            for token in args.clusters:

                def _query(zk):
                    replies = fire(zk, token, 'info')
                    return len(replies), [[key, '|', hints['ip'], '|', hints['node'], '|', hints['process'], '|', hints['state']]
                                          for key, (_, hints, code) in sorted(replies.items()) if code == 200]

                total, js = run(proxy, _query)

                outs.update({item[0]: {'ip': item[2], 'node': item[4], 'process': item[6], 'state': item[8]} for item in js})

                if js and not args.json:

                    #
                    # - justify & format the whole thing in a nice set of columns
                    #
                    pct = (len(js) * 100) / total
                    logger.info('<%s> -> %d%% replies (%d pods total) ->\n' % (token, pct, len(js)))
                    rows = [['pod', '|', 'pod IP', '|', 'node', '|', 'process', '|', 'state'], ['', '|', '', '|', '', '|', '', '|', '']] + js
                    widths = [max(map(len, col)) for col in zip(*rows)]
                    for row in rows:
                        logger.info('  '.join((val.ljust(width) for val, width in zip(row, widths))))

            if args.json:
                
                logger.info(json.dumps(outs))
コード例 #8
0
ファイル: port.py プロジェクト: nicbet/ochothon
        def body(self, args, unknown, proxy):

            port = str(args.port[0])

            def _query(zk):
                replies = fire(zk, args.clusters, 'info')
                return len(replies), [[key, '|', hints['ip'], '|', hints['public'], '|', str(hints['ports'][port])] for key, (_, hints, code) in sorted(replies.items()) if code == 200 and port in hints['ports']]

            total, js = run(proxy, _query)
            pct = (len(js) * 100) / total if total else 0
            if args.json:
                out = {item[0]: {'ip': item[2], 'public': item[4], 'ports': item[6]} for item in js}
                logger.info(json.dumps(out))

            elif js:

                #
                # - justify & format the whole thing in a nice set of columns
                #
                logger.info('<%s> -> %d%% replies (%d pods total) ->\n' % (args.clusters, pct, len(js)))
                rows = [['pod', '|', 'pod IP', '|', 'public IP', '|', 'TCP'], ['', '|', '', '|', '', '|', '']] + js
                widths = [max(map(len, col)) for col in zip(*rows)]
                for row in rows:
                    logger.info('  '.join((val.ljust(width) for val, width in zip(row, widths))))

            return 0
コード例 #9
0
    def run(self):
        try:

            def _query(zk):
                replies = fire(zk,
                               self.cluster,
                               'control/on',
                               subset=self.indices,
                               timeout=self.timeout)
                return len(replies), [
                    seq for seq, (_, _, code) in replies.items() if code == 200
                ]

            total, js = run(self.proxy, _query)
            assert len(js) == total, '1 or more pod failed to stop'

            self.out['on'] = js
            self.out['ok'] = True

        except AssertionError as failure:

            logger.debug('%s : failed to switch on -> %s' %
                         (self.cluster, failure))

        except Exception as failure:

            logger.debug('%s : failed to switch on -> %s' %
                         (self.cluster, diagnostic(failure)))
コード例 #10
0
ファイル: grep.py プロジェクト: autodesk-cloud/ochonetes
        def body(self, args, proxy):

            for token in args.clusters:

                def _query(zk):
                    replies = fire(zk, token, 'info')
                    return len(replies), \
                           [[key, '|', hints['ip'], '|', hints['process'], '|', hints['state']] for key, (_, hints, code) in replies.items() if code == 200]

                total, js = run(proxy, _query)
                if not total:

                    logger.info('\n<%s> -> no pods found' % token)

                else:

                    #
                    # - justify & format the whole thing in a nice set of columns
                    #
                    pct = (len(js) * 100) / total
                    logger.info('\n<%s> -> %d%% replies (%d pods total) ->\n' % (token, pct, total))
                    rows = [['cluster', '|', 'pod IP', '|', 'process', '|', 'state'], ['', '|', '', '|', '', '|', '']] + js
                    widths = [max(map(len, col)) for col in zip(*rows)]
                    for row in rows:
                        logger.info("  ".join((val.ljust(width) for val, width in zip(row, widths))))
コード例 #11
0
ファイル: poll.py プロジェクト: a3linux/ochothon
        def body(self, args, proxy):

            #
            # - grab the user metrics returned in sanity_check()
            # - those are returned via a POST /info
            #
            outs = {}

            for token in args.clusters:

                def _query(zk):
                    replies = fire(zk, token, 'info')
                    return len(replies), {key: hints['metrics'] for key, (index, hints, code) in replies.items() if code == 200 and 'metrics' in hints}

                total, js = run(proxy, _query)
                
                outs.update(js)

                #
                # - prettify if not asked for a json string
                #
                if js and not args.json:

                    pct = (len(js) * 100) / total
                    logger.info('%d pods, %d%% replies ->\n' % (len(js), pct))
                    rows = [['pod', '|', 'metrics'], ['', '|', '']] + sorted([[key, '|', json.dumps(val)] for key, val in js.iteritems()])
                    widths = [max(map(len, col)) for col in zip(*rows)]
                    for row in rows:
                        logger.info('  '.join((val.ljust(width) for val, width in zip(row, widths))))
            
            if args.json:

                logger.info(json.dumps(outs))
コード例 #12
0
ファイル: poll.py プロジェクト: khanchan/ochothon
        def body(self, args, _, proxy):
            def _query(zk):
                replies = fire(zk, args.clusters, 'info')
                return len(replies), {
                    key: hints['metrics']
                    for key, (index, hints, code) in replies.items()
                    if code == 200 and 'metrics' in hints
                }

            total, js = run(proxy, _query)
            pct = ((len(js) * 100) / total) if total else 0
            if args.json:
                logger.info(json.dumps(js))

            elif js:

                #
                # - justify & format the whole thing in a nice set of columns
                #
                logger.info('<%s> -> %d%% replies (%d pods total) ->\n' %
                            (args.clusters, pct, len(js)))
                rows = [['pod', '|', 'metrics'], ['', '|', '']] + sorted(
                    [[key, '|', json.dumps(val)]
                     for key, val in js.iteritems()])
                widths = [max(map(len, col)) for col in zip(*rows)]
                for row in rows:
                    logger.info('  '.join(
                        (val.ljust(width) for val, width in zip(row, widths))))

            return 0
コード例 #13
0
ファイル: ping.py プロジェクト: a3linux/ochothon
        def body(self, args, proxy):

            try:
                with open(args.yaml[0], 'r') as f:
                    payload = yaml.load(f)

                total = 0
                merged = {}
                for token in args.clusters:

                    def _query(zk):
                        replies = fire(zk, token, 'control/signal', js=json.dumps(payload))
                        return len(replies), {key: data for key, (_, data, code) in replies.items() if code == 200}

                    pods, js = run(proxy, _query)
                    merged.update(js)
                    total += pods

                pct = (len(merged) * 100) / total if total else 0
                logger.info(json.dumps(merged) if args.json else '%d%% replies, pinged %d pods' % (pct, len(merged)))

            except IOError:

                logger.info('unable to load %s' % args.yaml[0])

            except YAMLError as failure:

                if hasattr(failure, 'problem_mark'):
                    mark = failure.problem_mark
                    assert 0, '%s is invalid (line %s, column %s)' % (args.yaml, mark.line+1, mark.column+1)
コード例 #14
0
ファイル: info.py プロジェクト: trb116/pythonanalyzer
        def body(self, args, proxy):

            for token in args.clusters:

                def _query(zk):
                    replies = fire(zk, token, 'info')
                    return len(replies), {
                        key: hints
                        for key, (_, hints, code) in replies.items()
                        if code == 200
                    }

                total, js = run(proxy, _query)
                if not total:

                    logger.info('\n<%s> -> no pods found' % token)

                else:

                    #
                    # - justify & format the whole thing in a nice set of columns
                    #
                    pct = (len(js) * 100) / total
                    unrolled = [
                        '%s\n%s\n' %
                        (k, json.dumps(js[k], indent=4,
                                       separators=(',', ': ')))
                        for k in sorted(js.keys())
                    ]
                    logger.info(
                        '\n<%s> -> %d%% replies (%d pods total) ->\n\n- %s' %
                        (token, pct, total, '\n- '.join(unrolled)))
コード例 #15
0
ファイル: bump.py プロジェクト: autodesk-cloud/ochothon
            def _spin():
                def _query(zk):
                    replies = fire(zk, self.cluster, 'info')
                    return [(hints['process'], seq) for seq, hints, _ in replies.values() if hints['process'] in target]

                js = run(self.proxy, _query)
                assert len(js) == capacity, 'not all pods running yet'
                return js
コード例 #16
0
                def _spin():
                    def _query(zk):
                        replies = fire(zk, self.cluster, 'info')
                        return [seq for seq, _, _ in replies.values()]

                    js = run(self.proxy, _query)
                    assert len(js) == target, 'not all pods running yet'
                    return js
コード例 #17
0
ファイル: scale.py プロジェクト: a3linux/ochothon
                def _spin():
                    def _query(zk):
                        replies = fire(zk, self.cluster, 'info')
                        return [seq for seq, _, _ in replies.values()]

                    js = run(self.proxy, _query)
                    assert len(js) == target, 'not all pods running yet'
                    return js
コード例 #18
0
ファイル: deploy.py プロジェクト: a3linux/ochothon
                def _spin():
                    def _query(zk):
                        replies = fire(zk, qualified, 'info')
                        return [(hints['process'], seq) for seq, hints, _ in replies.values()
                                if hints['application'] == application and hints['process'] in target]

                    js = run(self.proxy, _query)
                    assert len(js) == self.pods, 'not all pods running yet'
                    return js
コード例 #19
0
            def _spin():
                def _query(zk):
                    replies = fire(zk, self.cluster, 'info')
                    return [(hints['process'], seq)
                            for seq, hints, _ in replies.values()
                            if hints['process'] in target]

                js = run(self.proxy, _query)
                assert len(js) == capacity, 'not all pods running yet'
                return js
コード例 #20
0
ファイル: deploy.py プロジェクト: a3linux/ochothon
                def _spin():
                    def _query(zk):
                        replies = fire(zk, qualified, 'info')
                        return [(hints['process'], seq)
                                for seq, hints, _ in replies.values()
                                if hints['application'] == application
                                and hints['process'] in target]

                    js = run(self.proxy, _query)
                    assert len(js) == self.pods, 'not all pods running yet'
                    return js
コード例 #21
0
ファイル: reset.py プロジェクト: a3linux/ochothon
    def run(self):
        try:

            #
            # - first turn the pod off
            #
            def _query(zk):
                replies = fire(zk, self.cluster, 'control/off', subset=self.indices)
                return [seq for _, (seq, _, code) in replies.items() if code == 200]

            js = run(self.proxy, _query)

            #
            # - reset it
            # - this will force a reconnection to zookeeper
            #
            def _query(zk):
                replies = fire(zk, self.cluster, 'reset', subset=self.indices)
                return [seq for _, (seq, _, code) in replies.items() if code == 200]

            assert js == run(self.proxy, _query), 'one or more pods did not respond'

            #
            # - then turn the pod back on
            #
            def _query(zk):
                replies = fire(zk, self.cluster, 'control/on', subset=self.indices)
                return [seq for _, (seq, _, code) in replies.items() if code == 200]

            assert js == run(self.proxy, _query), 'one or more pods did not respond'

            self.out['reset'] = js
            self.out['ok'] = True

        except AssertionError as failure:

            logger.debug('%s : failed to reset -> %s' % (self.cluster, failure))

        except Exception as failure:

            logger.debug('%s : failed to reset -> %s' % (self.cluster, diagnostic(failure)))
コード例 #22
0
ファイル: log.py プロジェクト: autodesk-cloud/ochonetes
        def body(self, args, proxy):

            for token in args.clusters:

                def _query(zk):
                    return fire(zk, token, 'log')

                js = run(proxy, _query)
                for pod in sorted(js.keys()):
                    _, log, code = js[pod]
                    if code == 200:
                        logger.info('\n%s ->\n\n- %s' % (pod, '- '.join(log[-64:])))
コード例 #23
0
ファイル: on.py プロジェクト: a3linux/ochothon
        def body(self, args, proxy):

            for token in args.clusters:

                def _query(zk):
                    replies = fire(zk, token, 'control/on', subset=args.indices)
                    return len(replies), [pod for pod, (_, _, code) in replies.items() if code == 200]

                total, js = run(proxy, _query)
                if js:
                    pct = (len(js) * 100) / total
                    logger.info('<%s> -> %d%% replies, %d pods on' % (token, pct, len(js)))
コード例 #24
0
ファイル: scale.py プロジェクト: pferrot/ochothon
                def _spin():
                    def _query(zk):
                        replies = fire(zk, self.cluster, 'info')
                        return [(seq, hints['application'], hints['task']) for (seq, hints, _) in replies.values()]

                    js = run(self.proxy, _query)
                    if self.group is not None:
                        nb_pods = sum(1 for (_, key, _) in js if key == app)
                    else:
                        nb_pods = len(js)
                    assert nb_pods == target, 'not all pods running yet'
                    return js
コード例 #25
0
ファイル: ls.py プロジェクト: autodesk-cloud/ochonetes
        def body(self, args, proxy):

            def _query(zk):
                responses = fire(zk, '*', 'info')
                return {key: hints['process'] for key, (_, hints, code) in responses.items() if code == 200}

            js = run(proxy, _query)
            if not js:
                logger.info('\n0 pods')
            else:
                running = [pod for pod, state in js.items() if state == 'running']
                pct = int((100 * len(running)) / len(js))
                logger.info('\n%d pods, %d%% running ->\n - %s' % (len(js), pct, '\n - '.join(sorted(running))))
コード例 #26
0
        def body(self, args, proxy):

            for token in args.clusters:

                def _query(zk):
                    return fire(zk, token, 'log')

                js = run(proxy, _query)
                for pod in sorted(js.keys()):
                    _, log, code = js[pod]
                    if code == 200:
                        logger.info('\n%s ->\n\n- %s' %
                                    (pod, '- '.join(log[-64:])))
コード例 #27
0
        def body(self, args, proxy):

            for token in args.clusters:

                def _query(zk):
                    replies = fire(zk, token, 'log', subset=args.indices)
                    return len(replies), {key: log for key, (_, log, code) in replies.items() if code == 200}

                total, js = run(proxy, _query)
                if js:
                    pct = ((len(js) * 100) / total)
                    unrolled = ['- %s\n\n  %s' % (key, '  '.join(log if args.long else log[-16:])) for key, log in js.items()]
                    logger.info('<%s> -> %d%% replies (%d pods total) ->\n%s' % (token, pct, len(js), '\n'.join(unrolled)))
コード例 #28
0
ファイル: log.py プロジェクト: UncleBarney/ochothon
        def body(self, args, proxy):

            for token in args.clusters:

                def _query(zk):
                    replies = fire(zk, token, 'log', subset=args.indices)
                    return len(replies), {key: log for key, (_, log, code) in replies.items() if code == 200}

                total, js = run(proxy, _query)
                if js:
                    pct = ((len(js) * 100) / total)
                    unrolled = ['- %s\n\n  %s' % (key, '  '.join(log if args.long else log[-16:])) for key, log in js.items()]
                    logger.info('<%s> -> %d%% replies (%d pods total) ->\n%s' % (token, pct, len(js), '\n'.join(unrolled)))
コード例 #29
0
ファイル: kill.py プロジェクト: khanchan/ochothon
            def _spin():
                def _query(zk):
                    replies = fire(zk, self.cluster, 'control/kill', subset=self.indices, timeout=self.timeout)
                    return [(code, seq) for seq, _, code in replies.values()]

                #
                # - fire the request one or more pods
                # - wait for every pod to report back a HTTP 410 (GONE)
                # - this means the ochopod state-machine is now idling (e.g dead)
                #
                js = run(self.proxy, _query)
                gone = sum(1 for code, _ in js if code == 410)
                assert gone == len(js), 'at least one pod is still running'
                return [seq for _, seq in js]
コード例 #30
0
ファイル: off.py プロジェクト: lmok/ochothon
        def body(self, args, proxy):

            assert args.force or args.subset, 'you must specify --force if -i is not set'

            for token in args.clusters:

                def _query(zk):
                    replies = fire(zk, token, 'control/off', subset=args.subset)
                    return len(replies), [pod for pod, (_, _, code) in replies.items() if code == 200]

                total, js = run(proxy, _query)
                if js:
                    pct = (len(js) * 100) / total
                    logger.info('<%s> -> %d%% replies, %d pods off' % (token, pct, len(js)))
コード例 #31
0
ファイル: bump.py プロジェクト: autodesk-cloud/ochothon
            def _spin():
                def _query(zk):
                    replies = fire(zk, self.cluster, 'control/kill', timeout=self.timeout)
                    return [(code, seq) for seq, _, code in replies.values()]

                #
                # - fire the request one or more pods
                # - wait for every pod to report back a HTTP 410 (GONE)
                # - this means the ochopod state-machine is now idling (e.g dead)
                #
                js = run(self.proxy, _query)
                gone = sum(1 for code, _ in js if code == 410)
                assert gone == len(js), 'at least one pod is still running'
                return
コード例 #32
0
ファイル: off.py プロジェクト: trb116/pythonanalyzer
        def body(self, args, proxy):

            for token in args.clusters:

                def _query(zk):
                    replies = fire(zk, token, 'control/off')
                    return len(replies), [pod for pod, (_, hints, code) in replies.items() if code == 200]

                total, js = run(proxy, _query)
                if not total:
                    logger.info("\n<%s> -> no pods found" % token)

                else:
                    pct = (len(js) * 100) / total
                    logger.info('\n<%s> -> %d%% replies, %d pods off' % (token, pct, len(js)))
コード例 #33
0
ファイル: grep.py プロジェクト: a3linux/ochothon
        def body(self, args, proxy):

            outs = {}

            for token in args.clusters:

                def _query(zk):
                    replies = fire(zk, token, 'info')
                    return len(replies), [[
                        key, '|', hints['ip'], '|', hints['node'], '|',
                        hints['process'], '|', hints['state']
                    ] for key, (_, hints, code) in sorted(replies.items())
                                          if code == 200]

                total, js = run(proxy, _query)

                outs.update({
                    item[0]: {
                        'ip': item[2],
                        'node': item[4],
                        'process': item[6],
                        'state': item[8]
                    }
                    for item in js
                })

                if js and not args.json:

                    #
                    # - justify & format the whole thing in a nice set of columns
                    #
                    pct = (len(js) * 100) / total
                    logger.info('<%s> -> %d%% replies (%d pods total) ->\n' %
                                (token, pct, len(js)))
                    rows = [[
                        'pod', '|', 'pod IP', '|', 'node', '|', 'process', '|',
                        'state'
                    ], ['', '|', '', '|', '', '|', '', '|', '']] + js
                    widths = [max(map(len, col)) for col in zip(*rows)]
                    for row in rows:
                        logger.info('  '.join(
                            (val.ljust(width)
                             for val, width in zip(row, widths))))

            if args.json:

                logger.info(json.dumps(outs))
コード例 #34
0
        def body(self, args, _, proxy):
            def _query(zk):
                replies = fire(zk, '*', 'info')
                return len(replies), {
                    key: hints
                    for key, (_, hints, code) in replies.items() if code == 200
                }

            out = {}
            total, js = run(proxy, _query)
            pct = ((len(js) * 100) / total) if total else 0
            for key, hints in js.items():
                qualified = key.split(' ')[0]
                if not qualified in out:
                    out[qualified] = \
                        {
                            'total': 0,
                            'running': 0,
                            'status': ''
                        }

                item = out[qualified]
                item['total'] += 1
                if hints['process'] == 'running':
                    item['running'] += 1

                if 'status' in hints and hints['status']:
                    item['status'] = hints['status']

            if args.json:
                logger.info(json.dumps(out))

            elif js:
                logger.info('%d pods, %d%% replies ->\n' % (len(js), pct))
                unrolled = [[
                    key, '|',
                    '%d/%d' % (item['running'], item['total']), '|',
                    item['status']
                ] for key, item in sorted(out.items())]
                rows = [['cluster', '|', 'ok', '|', 'status'],
                        ['', '|', '', '|', '']] + unrolled
                widths = [max(map(len, col)) for col in zip(*rows)]
                for row in rows:
                    logger.info('  '.join(
                        (val.ljust(width) for val, width in zip(row, widths))))

            return 0
コード例 #35
0
ファイル: log.py プロジェクト: autodesk-cloud/ochothon
        def body(self, args, _, proxy):

            def _query(zk):
                replies = fire(zk, args.clusters, 'log', subset=args.indices)
                return len(replies), {key: log for key, (_, log, code) in replies.items() if code == 200}

            total, js = run(proxy, _query)
            pct = ((len(js) * 100) / total) if total else 0
            if js:

                #
                # - justify & format the whole thing
                #
                unrolled = ['- %s\n\n  %s' % (key, '  '.join(log if args.long else log[-16:])) for key, log in js.items()]
                logger.info('<%s> -> %d%% replies (%d pods total) ->\n%s' % (args.clusters, pct, len(js), '\n'.join(unrolled)))

            return 0
コード例 #36
0
ファイル: on.py プロジェクト: a3linux/ochothon
        def body(self, args, proxy):

            for token in args.clusters:

                def _query(zk):
                    replies = fire(zk,
                                   token,
                                   'control/on',
                                   subset=args.indices)
                    return len(replies), [
                        pod for pod, (_, _, code) in replies.items()
                        if code == 200
                    ]

                total, js = run(proxy, _query)
                if js:
                    pct = (len(js) * 100) / total
                    logger.info('<%s> -> %d%% replies, %d pods on' %
                                (token, pct, len(js)))
コード例 #37
0
ファイル: exec.py プロジェクト: khanchan/ochothon
        def body(self, args, unknown, proxy):

            assert args.force or args.indices, 'you must specify --force if -i is not set'

            if unknown is not None:
                args.cmdline += unknown

            files = {}
            headers = {'X-Shell': ' '.join(args.cmdline)}
            for token in args.cmdline:
                if path.isfile(token):
                    with open(token, 'rb') as f:
                        files[token] = f.read()

            def _query(zk):
                replies = fire(zk,
                               args.clusters[0],
                               'exec',
                               subset=args.indices,
                               headers=headers,
                               files=files,
                               timeout=args.timeout)
                return len(replies), {
                    key: js
                    for key, (_, js, code) in replies.items() if code == 200
                }

            total, js = run(proxy, _query)
            pct = ((len(js) * 100) / total) if total else 0
            if args.json:
                logger.info(json.dumps(js))

            elif js:
                logger.info('<%s> -> %d%% replies (%d pods total) ->\n' %
                            (args.clusters[0], pct, len(js)))
                for key, log in js.items():
                    suffix = '\n\n %s\n' % '\n '.join(
                        log['stdout']) if log['stdout'] else ''
                    logger.info('- %s (exit code %d)%s' %
                                (key, log['code'], suffix))

            return 0 if pct == 100 else 1
コード例 #38
0
ファイル: nodes.py プロジェクト: a3linux/ochothon
        def body(self, args, proxy):

            def _query(zk):
                replies = fire(zk, '*', 'info')
                return len(replies), [hints['node'] for _, (_, hints, code) in replies.items() if code == 200]

            total, js = run(proxy, _query)
            if js:

                rollup = {key: 0 for key in set(js)}
                for node in js:
                    rollup[node] += 1

                pct = (100 * len(js)) / total
                logger.info('%d pods, %d%% replies ->\n' % (len(js), pct))
                unrolled = [[key, '|', '%d%%' % ((100 * n) / total)] for key, n in sorted(rollup.items())]
                rows = [['node', '|', 'load'], ['', '|', '']] + unrolled
                widths = [max(map(len, col)) for col in zip(*rows)]
                for row in rows:
                    logger.info('  '.join((val.ljust(width) for val, width in zip(row, widths))))
コード例 #39
0
ファイル: on.py プロジェクト: autodesk-cloud/ochothon
    def run(self):
        try:

            def _query(zk):
                replies = fire(zk, self.cluster, 'control/on', subset=self.indices, timeout=self.timeout)
                return len(replies), [seq for seq, (_, _, code) in replies.items() if code == 200]

            total, js = run(self.proxy, _query)
            assert len(js) == total, '1 or more pod failed to stop'

            self.out['on'] = js
            self.out['ok'] = True

        except AssertionError as failure:

            logger.debug('%s : failed to switch on -> %s' % (self.cluster, failure))

        except Exception as failure:

            logger.debug('%s : failed to switch on -> %s' % (self.cluster, diagnostic(failure)))
コード例 #40
0
ファイル: ping.py プロジェクト: a3linux/ochothon
        def body(self, args, proxy):

            try:
                with open(args.yaml[0], 'r') as f:
                    payload = yaml.load(f)

                total = 0
                merged = {}
                for token in args.clusters:

                    def _query(zk):
                        replies = fire(zk,
                                       token,
                                       'control/signal',
                                       js=json.dumps(payload))
                        return len(replies), {
                            key: data
                            for key, (_, data, code) in replies.items()
                            if code == 200
                        }

                    pods, js = run(proxy, _query)
                    merged.update(js)
                    total += pods

                pct = (len(merged) * 100) / total if total else 0
                logger.info(
                    json.dumps(merged) if args.
                    json else '%d%% replies, pinged %d pods' %
                    (pct, len(merged)))

            except IOError:

                logger.info('unable to load %s' % args.yaml[0])

            except YAMLError as failure:

                if hasattr(failure, 'problem_mark'):
                    mark = failure.problem_mark
                    assert 0, '%s is invalid (line %s, column %s)' % (
                        args.yaml, mark.line + 1, mark.column + 1)
コード例 #41
0
ファイル: port.py プロジェクト: lmok/ochothon
        def body(self, args, proxy):

            port = str(args.port[0])
            for cluster in args.clusters:

                def _query(zk):
                    replies = fire(zk, cluster, 'info')
                    return len(replies), [[key, '|', hints['ip'], '|', hints['public'], '|', str(hints['ports'][port])] for key, (_, hints, code) in sorted(replies.items()) if code == 200 and port in hints['ports']]

                total, js = run(proxy, _query)
                if js:

                    #
                    # - justify & format the whole thing in a nice set of columns
                    #
                    pct = (len(js) * 100) / total
                    logger.info('<%s> -> %d%% replies (%d pods total) ->\n' % (cluster, pct, len(js)))
                    rows = [['pod', '|', 'pod IP', '|', 'public IP', '|', 'TCP'], ['', '|', '', '|', '']] + js
                    widths = [max(map(len, col)) for col in zip(*rows)]
                    for row in rows:
                        logger.info('  '.join((val.ljust(width) for val, width in zip(row, widths))))
コード例 #42
0
ファイル: info.py プロジェクト: autodesk-cloud/ochonetes
        def body(self, args, proxy):

            for token in args.clusters:

                def _query(zk):
                    replies = fire(zk, token, 'info')
                    return len(replies), {key: hints for key, (_, hints, code) in replies.items() if code == 200}

                total, js = run(proxy, _query)
                if not total:

                    logger.info('\n<%s> -> no pods found' % token)

                else:

                    #
                    # - justify & format the whole thing in a nice set of columns
                    #
                    pct = (len(js) * 100) / total
                    unrolled = ['%s\n%s\n' % (k, json.dumps(js[k], indent=4, separators=(',', ': '))) for k in sorted(js.keys())]
                    logger.info('\n<%s> -> %d%% replies (%d pods total) ->\n\n- %s' % (token, pct, total, '\n- '.join(unrolled)))
コード例 #43
0
        def body(self, args, proxy):

            assert args.force or args.indices, 'you must specify --force if -i is not set'

            for token in args.clusters:

                def _query(zk):
                    replies = fire(zk,
                                   token,
                                   'control/off',
                                   subset=args.indices)
                    return len(replies), [
                        pod for pod, (_, _, code) in replies.items()
                        if code == 200
                    ]

                total, js = run(proxy, _query)
                if js:
                    pct = (len(js) * 100) / total
                    logger.info('<%s> -> %d%% replies, %d pods off' %
                                (token, pct, len(js)))
コード例 #44
0
ファイル: ls.py プロジェクト: UncleBarney/ochothon
        def body(self, args, proxy):

            def _query(zk):
                replies = fire(zk, '*', 'info')
                return len(replies), {key: hints for key, (_, hints, code) in replies.items() if code == 200}

            total, js = run(proxy, _query)
            if js:

                out = {}
                for key, hints in js.items():
                    qualified = key.split(' ')[0]
                    if not qualified in out:
                        out[qualified] = \
                            {
                                'total': 0,
                                'running': 0,
                                'status': ''
                            }

                    item = out[qualified]
                    item['total'] += 1
                    if hints['process'] == 'running':
                        item['running'] += 1

                    if 'status' in hints and hints['status']:
                        item['status'] = hints['status']

                if args.json:
                    logger.info(json.dumps(out))

                else:
                    pct = (100 * len(js)) / total
                    logger.info('%d pods, %d%% replies ->\n' % (len(js), pct))
                    unrolled = [[key, '|', '%d/%d' % (item['running'], item['total']), '|', item['status']] for key, item in sorted(out.items())]
                    rows = [['cluster', '|', 'ok', '|', 'status'], ['', '|', '', '|', '']] + unrolled
                    widths = [max(map(len, col)) for col in zip(*rows)]
                    for row in rows:
                        logger.info('  '.join((val.ljust(width) for val, width in zip(row, widths))))
コード例 #45
0
ファイル: poll.py プロジェクト: autodesk-cloud/ochothon
        def body(self, args, _, proxy):

            def _query(zk):
                replies = fire(zk, args.clusters, 'info')
                return len(replies), {key: hints['metrics'] for key, (index, hints, code) in replies.items() if code == 200 and 'metrics' in hints}

            total, js = run(proxy, _query)
            pct = ((len(js) * 100) / total) if total else 0
            if args.json:
                logger.info(json.dumps(js))

            elif js:

                #
                # - justify & format the whole thing in a nice set of columns
                #
                logger.info('<%s> -> %d%% replies (%d pods total) ->\n' % (args.clusters, pct, len(js)))
                rows = [['pod', '|', 'metrics'], ['', '|', '']] + sorted([[key, '|', json.dumps(val)] for key, val in js.iteritems()])
                widths = [max(map(len, col)) for col in zip(*rows)]
                for row in rows:
                    logger.info('  '.join((val.ljust(width) for val, width in zip(row, widths))))

            return 0
コード例 #46
0
        def body(self, args, proxy):
            def _query(zk):
                replies = fire(zk, '*', 'info')
                return len(replies), [
                    hints['node'] for _, (_, hints, code) in replies.items()
                    if code == 200
                ]

            total, js = run(proxy, _query)
            if js:

                rollup = {key: 0 for key in set(js)}
                for node in js:
                    rollup[node] += 1

                pct = (100 * len(js)) / total
                logger.info('%d pods, %d%% replies ->\n' % (len(js), pct))
                unrolled = [[key, '|', '%d%%' % ((100 * n) / total)]
                            for key, n in sorted(rollup.items())]
                rows = [['node', '|', 'load'], ['', '|', '']] + unrolled
                widths = [max(map(len, col)) for col in zip(*rows)]
                for row in rows:
                    logger.info('  '.join(
                        (val.ljust(width) for val, width in zip(row, widths))))
コード例 #47
0
        def body(self, args, _, proxy):
            def _query(zk):
                replies = fire(zk, args.clusters, 'log', subset=args.indices)
                return len(replies), {
                    key: log
                    for key, (_, log, code) in replies.items() if code == 200
                }

            total, js = run(proxy, _query)
            pct = ((len(js) * 100) / total) if total else 0
            if js:

                #
                # - justify & format the whole thing
                #
                unrolled = [
                    '- %s\n\n  %s' %
                    (key, '  '.join(log if args.long else log[-16:]))
                    for key, log in js.items()
                ]
                logger.info('<%s> -> %d%% replies (%d pods total) ->\n%s' %
                            (args.clusters, pct, len(js), '\n'.join(unrolled)))

            return 0
コード例 #48
0
ファイル: port.py プロジェクト: UncleBarney/ochothon
        def body(self, args, proxy):

            outs = {}
            port = str(args.port[0])
            for cluster in args.clusters:

                def _query(zk):
                    replies = fire(zk, cluster, "info")
                    return (
                        len(replies),
                        [
                            [key, "|", hints["ip"], "|", hints["public"], "|", str(hints["ports"][port])]
                            for key, (_, hints, code) in sorted(replies.items())
                            if code == 200 and port in hints["ports"]
                        ],
                    )

                total, js = run(proxy, _query)

                outs.update({item[0]: {"ip": item[2], "public": item[4], "ports": item[6]} for item in js})

                if js and not args.json:

                    #
                    # - justify & format the whole thing in a nice set of columns
                    #
                    pct = (len(js) * 100) / total
                    logger.info("<%s> -> %d%% replies (%d pods total) ->\n" % (cluster, pct, len(js)))
                    rows = [["pod", "|", "pod IP", "|", "public IP", "|", "TCP"], ["", "|", "", "|", "", "|", ""]] + js
                    widths = [max(map(len, col)) for col in zip(*rows)]
                    for row in rows:
                        logger.info("  ".join((val.ljust(width) for val, width in zip(row, widths))))

            if args.json:

                logger.info(json.dumps(outs))
コード例 #49
0
    def run(self):
        try:

            #
            # - we need to pass the framework master IPs around (ugly)
            #
            assert 'MARATHON_MASTER' in os.environ, '$MARATHON_MASTER not specified (check your portal pod)'
            master = choice(os.environ['MARATHON_MASTER'].split(','))
            headers = \
                {
                    'content-type': 'application/json',
                    'accept': 'application/json'
                }

            #
            # - first peek and see what pods we have
            # - they should all map to one single marathon application (abort if not)
            # - we'll use the application identifier to retrieve the configuration json later on
            #
            def _query(zk):
                replies = fire(zk, self.cluster, 'info')
                return [
                    hints['application'] for (_, hints, _) in replies.values()
                ]

            js = run(self.proxy, _query)
            assert len(
                set(js)
            ) == 1, '%s is mapping to 2+ marathon applications' % self.cluster
            app = js[0]

            #
            # - fetch the various versions for our app
            # - we want to get hold of the most recent configuration
            #
            url = 'http://%s/v2/apps/%s/versions' % (master, app)
            reply = get(url, headers=headers)
            code = reply.status_code
            logger.debug('-> %s (HTTP %d)' % (url, code))
            assert code == 200 or code == 201, 'delete failed (HTTP %d)' % code
            js = reply.json()

            #
            # - retrieve the latest one
            # - keep the docker container configuration and the # of tasks around
            #
            last = js['versions'][0]
            url = 'http://%s/v2/apps/%s/versions/%s' % (master, app, last)
            reply = get(url, headers=headers)
            code = reply.status_code
            logger.debug('-> %s (HTTP %d)' % (url, code))
            assert code == 200 or code == 201, 'delete failed (HTTP %d)' % code
            js = reply.json()

            spec = js['container']
            tag = spec['docker']['image']
            capacity = js['instances']

            #
            # - kill all the pods using a POST /control/kill
            # - wait for them to be dead
            #
            @retry(timeout=self.timeout, pause=0)
            def _spin():
                def _query(zk):
                    replies = fire(zk,
                                   self.cluster,
                                   'control/kill',
                                   timeout=self.timeout)
                    return [(code, seq) for seq, _, code in replies.values()]

                #
                # - fire the request one or more pods
                # - wait for every pod to report back a HTTP 410 (GONE)
                # - this means the ochopod state-machine is now idling (e.g dead)
                #
                js = run(self.proxy, _query)
                gone = sum(1 for code, _ in js if code == 410)
                assert gone == len(js), 'at least one pod is still running'
                return

            _spin()

            #
            # - grab the docker image
            # - just add a :<version> suffix (or replace it) but don't change the image  proper
            # - update the image and PUT the new configuration back
            # - marathon will then kill & re-start all the tasks
            #
            tokens = tag.split(':')
            spec['docker']['image'] = \
                '%s:%s' % (tag, self.version) if len(tokens) < 2 else '%s:%s' % (tokens[0], self.version)
            js = \
                {
                    'container': spec
                }

            url = 'http://%s/v2/apps/%s' % (master, app)
            reply = put(url, data=json.dumps(js), headers=headers)
            code = reply.status_code
            logger.debug('-> %s (HTTP %d)' % (url, code))
            logger.debug(reply.text)
            assert code == 200 or code == 201, 'update failed (HTTP %d)' % code

            #
            # - the pods should now be starting
            # - wait for all the pods to be in the 'running' mode (they are 'dead' right now)
            # - the sequence counters allocated to our new pods are returned as well
            #
            target = ['running'] if self.strict else ['stopped', 'running']

            @retry(timeout=self.timeout, pause=3, default={})
            def _spin():
                def _query(zk):
                    replies = fire(zk, self.cluster, 'info')
                    return [(hints['process'], seq)
                            for seq, hints, _ in replies.values()
                            if hints['process'] in target]

                js = run(self.proxy, _query)
                assert len(js) == capacity, 'not all pods running yet'
                return js

            js = _spin()
            up = [seq for _, seq in js]
            assert len(up) == capacity, '1+ pods still not up (%d/%d)' % (
                len(up), capacity)
            self.out['up'] = up
            self.out['ok'] = True

            logger.debug('%s : %d pods updated to version "%s"' %
                         (self.cluster, capacity, self.version))

        except AssertionError as failure:

            logger.debug('%s : failed to bump -> %s' % (self.cluster, failure))

        except Exception as failure:

            logger.debug('%s : failed to bump -> %s' %
                         (self.cluster, diagnostic(failure)))
コード例 #50
0
ファイル: scale.py プロジェクト: a3linux/ochothon
    def run(self):
        try:

            #
            # - we need to pass the framework master IPs around (ugly)
            #
            assert 'MARATHON_MASTER' in os.environ, '$MARATHON_MASTER not specified (check your portal pod)'
            master = choice(os.environ['MARATHON_MASTER'].split(','))
            headers = \
                {
                    'content-type': 'application/json',
                    'accept': 'application/json'
                }

            #
            # - first peek and see what pods we have
            #
            def _query(zk):
                replies = fire(zk, self.cluster, 'info')
                return [(seq, hints['application'], hints['task']) for (seq, hints, _) in replies.values()]

            #
            # - remap a bit differently and get an ordered list of task identifiers
            # - we'll use that to kill the newest pods
            #
            js = run(self.proxy, _query)
            total = len(js)
            if self.group is not None:

                #
                # - if -g was specify apply the scaling to the underlying marathon application containing that pod
                # - be careful to update the task list and total # of pods
                #
                keys = {seq: key for (seq, key, _) in js}
                assert self.group in keys, '#%d is not a valid pod index' % self.group
                app = keys[self.group]
                tasks = [(seq, task) for (seq, key, task) in sorted(js, key=(lambda _: _[0])) if key == app]
                total = sum(1 for (_, key, _) in js if key == app)

            else:

                #
                # - check and make sure all our pods map to one single marathon application
                #
                keys = set([key for (_, key, _) in js])
                assert len(keys) == 1, '%s maps to more than one application, you must specify -g' % self.cluster
                tasks = [(seq, task) for (seq, _, task) in sorted(js, key=(lambda _: _[0]))]
                app = keys.pop()

            #
            # - infer the target # of pods based on the user-defined factor
            #
            operator = self.factor[0]
            assert operator in ['@', 'x'], 'invalid operator'
            n = float(self.factor[1:])
            target = n if operator == '@' else total * n

            #
            # - clip the target # of pods down to 1
            #
            target = max(1, int(target))
            self.out['delta'] = target - total
            if target > total:

                #
                # - scale the application capacity up
                #
                js = \
                    {
                        'instances': target
                    }

                url = 'http://%s/v2/apps/%s' % (master, app)
                reply = put(url, data=json.dumps(js), headers=headers)
                code = reply.status_code
                logger.debug('-> %s (HTTP %d)' % (url, code))
                assert code == 200 or code == 201, 'update failed (HTTP %d)' % code

                #
                # - wait for all our new pods to be there
                #
                @retry(timeout=self.timeout, pause=3, default={})
                def _spin():
                    def _query(zk):
                        replies = fire(zk, self.cluster, 'info')
                        return [seq for seq, _, _ in replies.values()]

                    js = run(self.proxy, _query)
                    assert len(js) == target, 'not all pods running yet'
                    return js

                _spin()

            elif target < total:

                #
                # - if the fifo switch is on make sure to pick the oldest pods for deletion
                #
                tasks = tasks[:total - target] if self.fifo else tasks[target:]

                #
                # - kill all (or part of) the pods using a POST /control/kill
                # - wait for them to be dead
                #
                @retry(timeout=self.timeout, pause=0)
                def _spin():
                    def _query(zk):
                        indices = [seq for (seq, _) in tasks]
                        replies = fire(zk, self.cluster, 'control/kill', subset=indices, timeout=self.timeout)
                        return [(code, seq) for seq, _, code in replies.values()]

                    #
                    # - fire the request one or more pods
                    # - wait for every pod to report back a HTTP 410 (GONE)
                    # - this means the ochopod state-machine is now idling (e.g dead)
                    #
                    js = run(self.proxy, _query)
                    gone = sum(1 for code, _ in js if code == 410)
                    assert gone == len(js), 'at least one pod is still running'
                    return

                _spin()

                #
                # - delete all the underlying tasks at once using POST v2/tasks/delete
                #
                js = \
                    {
                        'ids': [task for (_, task) in tasks]
                    }

                url = 'http://%s/v2/tasks/delete?scale=true' % master
                reply = post(url, data=json.dumps(js), headers=headers)
                code = reply.status_code
                logger.debug('-> %s (HTTP %d)' % (url, code))
                assert code == 200 or code == 201, 'delete failed (HTTP %d)' % code

            self.out['ok'] = True

        except AssertionError as failure:

            logger.debug('%s : failed to scale -> %s' % (self.cluster, failure))

        except Exception as failure:

            logger.debug('%s : failed to scale -> %s' % (self.cluster, diagnostic(failure)))
コード例 #51
0
    def run(self):
        try:

            #
            # - we need to pass the framework master IPs around (ugly)
            #
            assert 'MARATHON_MASTER' in os.environ, '$MARATHON_MASTER not specified (check your portal pod)'
            master = choice(os.environ['MARATHON_MASTER'].split(','))
            headers = \
                {
                    'content-type': 'application/json',
                    'accept': 'application/json'
                }

            #
            # - first peek and see what pods we have
            #
            def _query(zk):
                replies = fire(zk, self.cluster, 'info')
                return [(seq, hints['application'], hints['task'])
                        for (seq, hints, _) in replies.values()]

            #
            # - remap a bit differently and get an ordered list of task identifiers
            # - we'll use that to kill the newest pods
            #
            js = run(self.proxy, _query)
            total = len(js)
            if self.group is not None:

                #
                # - if -g was specify apply the scaling to the underlying marathon application containing that pod
                # - be careful to update the task list and total # of pods
                #
                keys = {seq: key for (seq, key, _) in js}
                assert self.group in keys, '#%d is not a valid pod index' % self.group
                app = keys[self.group]
                tasks = [(seq, task)
                         for (seq, key,
                              task) in sorted(js, key=(lambda _: _[0]))
                         if key == app]
                total = sum(1 for (_, key, _) in js if key == app)

            else:

                #
                # - check and make sure all our pods map to one single marathon application
                #
                keys = set([key for (_, key, _) in js])
                assert len(
                    keys
                ) == 1, '%s maps to more than one application, you must specify -g' % self.cluster
                tasks = [(seq, task)
                         for (seq, _, task) in sorted(js, key=(lambda _: _[0]))
                         ]
                app = keys.pop()

            #
            # - infer the target # of pods based on the user-defined factor
            #
            operator = self.factor[0]
            assert operator in ['@', 'x'], 'invalid operator'
            n = float(self.factor[1:])
            target = n if operator == '@' else total * n

            #
            # - clip the target # of pods down to 1
            #
            target = max(1, int(target))
            self.out['delta'] = target - total
            if target > total:

                #
                # - scale the application capacity up
                #
                js = \
                    {
                        'instances': target
                    }

                url = 'http://%s/v2/apps/%s' % (master, app)
                reply = put(url, data=json.dumps(js), headers=headers)
                code = reply.status_code
                logger.debug('-> %s (HTTP %d)' % (url, code))
                assert code == 200 or code == 201, 'update failed (HTTP %d)' % code

                #
                # - wait for all our new pods to be there
                #
                @retry(timeout=self.timeout, pause=3, default={})
                def _spin():
                    def _query(zk):
                        replies = fire(zk, self.cluster, 'info')
                        return [seq for seq, _, _ in replies.values()]

                    js = run(self.proxy, _query)
                    assert len(js) == target, 'not all pods running yet'
                    return js

                _spin()

            elif target < total:

                #
                # - if the fifo switch is on make sure to pick the oldest pods for deletion
                #
                tasks = tasks[:total - target] if self.fifo else tasks[target:]

                #
                # - kill all (or part of) the pods using a POST /control/kill
                # - wait for them to be dead
                #
                @retry(timeout=self.timeout, pause=0)
                def _spin():
                    def _query(zk):
                        indices = [seq for (seq, _) in tasks]
                        replies = fire(zk,
                                       self.cluster,
                                       'control/kill',
                                       subset=indices,
                                       timeout=self.timeout)
                        return [(code, seq)
                                for seq, _, code in replies.values()]

                    #
                    # - fire the request one or more pods
                    # - wait for every pod to report back a HTTP 410 (GONE)
                    # - this means the ochopod state-machine is now idling (e.g dead)
                    #
                    js = run(self.proxy, _query)
                    gone = sum(1 for code, _ in js if code == 410)
                    assert gone == len(js), 'at least one pod is still running'
                    return

                _spin()

                #
                # - delete all the underlying tasks at once using POST v2/tasks/delete
                #
                js = \
                    {
                        'ids': [task for (_, task) in tasks]
                    }

                url = 'http://%s/v2/tasks/delete?scale=true' % master
                reply = post(url, data=json.dumps(js), headers=headers)
                code = reply.status_code
                logger.debug('-> %s (HTTP %d)' % (url, code))
                assert code == 200 or code == 201, 'delete failed (HTTP %d)' % code

            self.out['ok'] = True

        except AssertionError as failure:

            logger.debug('%s : failed to scale -> %s' %
                         (self.cluster, failure))

        except Exception as failure:

            logger.debug('%s : failed to scale -> %s' %
                         (self.cluster, diagnostic(failure)))
コード例 #52
0
ファイル: kill.py プロジェクト: a3linux/ochothon
    def run(self):
        try:

            #
            # - we need to pass the framework master IPs around (ugly)
            #
            assert 'MARATHON_MASTER' in os.environ, '$MARATHON_MASTER not specified (check your portal pod)'
            master = choice(os.environ['MARATHON_MASTER'].split(','))
            headers = \
                {
                    'content-type': 'application/json',
                    'accept': 'application/json'
                }

            #
            # - kill all (or part of) the pods using a POST /control/kill
            # - wait for them to be dead
            # - warning, /control/kill will block (hence the 5 seconds timeout)
            #
            @retry(timeout=self.timeout, pause=0)
            def _spin():
                def _query(zk):
                    replies = fire(zk,
                                   self.cluster,
                                   'control/kill',
                                   subset=self.indices,
                                   timeout=self.timeout)
                    return [(code, seq) for seq, _, code in replies.values()]

                #
                # - fire the request one or more pods
                # - wait for every pod to report back a HTTP 410 (GONE)
                # - this means the ochopod state-machine is now idling (e.g dead)
                #
                js = run(self.proxy, _query)
                gone = sum(1 for code, _ in js if code == 410)
                assert gone == len(js), 'at least one pod is still running'
                return [seq for _, seq in js]

            down = _spin()
            self.out['down'] = down
            assert down, 'the cluster is either invalid or empty'
            logger.debug('%s : %d dead pods -> %s' %
                         (self.cluster, len(down), ', '.join(
                             ['#%d' % seq for seq in down])))

            #
            # - now peek and see what pods we have
            # - we want to know what the underlying marathon application & task are
            #
            def _query(zk):
                replies = fire(zk, self.cluster, 'info', subset=self.indices)
                return [(hints['application'], hints['task'])
                        for _, hints, _ in replies.values()]

            js = run(self.proxy, _query)
            rollup = {key: [] for key in set([key for key, _ in js])}
            for app, task in js:
                rollup[app] += [task]

            #
            # - go through each application
            # - query the it and check how many tasks it currently has
            # - the goal is to check if we should nuke the whole application or not
            #
            for app, tasks in rollup.items():

                url = 'http://%s/v2/apps/%s/tasks' % (master, app)
                reply = get(url, headers=headers)
                code = reply.status_code
                logger.debug('%s : -> %s (HTTP %d)' %
                             (self.cluster, url, code))
                assert code == 200, 'task lookup failed (HTTP %d)' % code
                js = reply.json()

                if len(tasks) == len(js['tasks']):

                    #
                    # - all the containers running for that application were reported as dead
                    # - issue a DELETE /v2/apps to nuke the whole thing
                    #
                    url = 'http://%s/v2/apps/%s' % (master, app)
                    reply = delete(url, headers=headers)
                    code = reply.status_code
                    logger.debug('%s : -> %s (HTTP %d)' %
                                 (self.cluster, url, code))
                    assert code == 200 or code == 204, 'application deletion failed (HTTP %d)' % code

                else:

                    #
                    # - we killed a subset of that application's pods
                    # - cherry pick the underlying tasks and delete them at once using POST v2/tasks/delete
                    #
                    js = \
                        {
                            'ids': tasks
                        }

                    url = 'http://%s/v2/tasks/delete?scale=true' % master
                    reply = post(url, data=json.dumps(js), headers=headers)
                    code = reply.status_code
                    logger.debug('-> %s (HTTP %d)' % (url, code))
                    assert code == 200 or code == 201, 'delete failed (HTTP %d)' % code

            self.out['ok'] = True

        except AssertionError as failure:

            logger.debug('%s : failed to kill -> %s' % (self.cluster, failure))

        except Exception as failure:

            logger.debug('%s : failed to kill -> %s' %
                         (self.cluster, diagnostic(failure)))
コード例 #53
0
ファイル: bump.py プロジェクト: autodesk-cloud/ochothon
    def run(self):
        try:

            #
            # - we need to pass the framework master IPs around (ugly)
            #
            assert 'MARATHON_MASTER' in os.environ, '$MARATHON_MASTER not specified (check your portal pod)'
            master = choice(os.environ['MARATHON_MASTER'].split(','))
            headers = \
                {
                    'content-type': 'application/json',
                    'accept': 'application/json'
                }

            #
            # - first peek and see what pods we have
            # - they should all map to one single marathon application (abort if not)
            # - we'll use the application identifier to retrieve the configuration json later on
            #
            def _query(zk):
                replies = fire(zk, self.cluster, 'info')
                return [hints['application'] for (_, hints, _) in replies.values()]

            js = run(self.proxy, _query)
            assert len(set(js)) == 1, '%s is mapping to 2+ marathon applications' % self.cluster
            app = js[0]

            #
            # - fetch the various versions for our app
            # - we want to get hold of the most recent configuration
            #
            url = 'http://%s/v2/apps/%s/versions' % (master, app)
            reply = get(url, headers=headers)
            code = reply.status_code
            logger.debug('-> %s (HTTP %d)' % (url, code))
            assert code == 200 or code == 201, 'delete failed (HTTP %d)' % code
            js = reply.json()

            #
            # - retrieve the latest one
            # - keep the docker container configuration and the # of tasks around
            #
            last = js['versions'][0]
            url = 'http://%s/v2/apps/%s/versions/%s' % (master, app, last)
            reply = get(url, headers=headers)
            code = reply.status_code
            logger.debug('-> %s (HTTP %d)' % (url, code))
            assert code == 200 or code == 201, 'delete failed (HTTP %d)' % code
            js = reply.json()

            spec = js['container']
            tag = spec['docker']['image']
            capacity = js['instances']

            #
            # - kill all the pods using a POST /control/kill
            # - wait for them to be dead
            #
            @retry(timeout=self.timeout, pause=0)
            def _spin():
                def _query(zk):
                    replies = fire(zk, self.cluster, 'control/kill', timeout=self.timeout)
                    return [(code, seq) for seq, _, code in replies.values()]

                #
                # - fire the request one or more pods
                # - wait for every pod to report back a HTTP 410 (GONE)
                # - this means the ochopod state-machine is now idling (e.g dead)
                #
                js = run(self.proxy, _query)
                gone = sum(1 for code, _ in js if code == 410)
                assert gone == len(js), 'at least one pod is still running'
                return

            _spin()

            #
            # - grab the docker image
            # - just add a :<version> suffix (or replace it) but don't change the image  proper
            # - update the image and PUT the new configuration back
            # - marathon will then kill & re-start all the tasks
            #
            tokens = tag.split(':')
            spec['docker']['image'] = \
                '%s:%s' % (tag, self.version) if len(tokens) < 2 else '%s:%s' % (tokens[0], self.version)
            js = \
                {
                    'container': spec
                }

            url = 'http://%s/v2/apps/%s' % (master, app)
            reply = put(url, data=json.dumps(js), headers=headers)
            code = reply.status_code
            logger.debug('-> %s (HTTP %d)' % (url, code))
            logger.debug(reply.text)
            assert code == 200 or code == 201, 'update failed (HTTP %d)' % code

            #
            # - the pods should now be starting
            # - wait for all the pods to be in the 'running' mode (they are 'dead' right now)
            # - the sequence counters allocated to our new pods are returned as well
            #
            target = ['running'] if self.strict else ['stopped', 'running']
            @retry(timeout=self.timeout, pause=3, default={})
            def _spin():
                def _query(zk):
                    replies = fire(zk, self.cluster, 'info')
                    return [(hints['process'], seq) for seq, hints, _ in replies.values() if hints['process'] in target]

                js = run(self.proxy, _query)
                assert len(js) == capacity, 'not all pods running yet'
                return js

            js = _spin()
            up = [seq for _, seq in js]
            assert len(up) == capacity, '1+ pods still not up (%d/%d)' % (len(up), capacity)
            self.out['up'] = up
            self.out['ok'] = True

            logger.debug('%s : %d pods updated to version "%s"' % (self.cluster, capacity, self.version))

        except AssertionError as failure:

            logger.debug('%s : failed to bump -> %s' % (self.cluster, failure))

        except Exception as failure:

            logger.debug('%s : failed to bump -> %s' % (self.cluster, diagnostic(failure)))
コード例 #54
0
ファイル: kill.py プロジェクト: trb116/pythonanalyzer
    def run(self):
        try:

            #
            # - workaround to fetch the master IP and credentials as there does not seem to
            #   be a way to use 10.0.0.2 from within the pod
            #
            assert 'KUBERNETES_MASTER' in os.environ, '$KUBERNETES_MASTER not specified (check your portal pod)'
            assert 'KUBERNETES_USER' in os.environ, '$KUBERNETES_USER not specified (check your portal pod)'
            assert 'KUBERNETES_PWD' in os.environ, '$KUBERNETES_PWD not specified (check your portal pod)'

            auth = HTTPBasicAuth(os.environ['KUBERNETES_USER'], os.environ['KUBERNETES_PWD'])

            def _query(zk):
                replies = fire(zk, self.cluster, 'info')
                return len(replies), {key: hints for key, (_, hints, code) in replies.items() if code == 200}

            #
            # - each pod refers to its controller via the 'application' hint
            #
            total, js = run(self.proxy, _query)
            assert total == len(js), 'failure to communicate with one or more pods'
            for key in set([hints['application'] for hints in js.values()]):

                #
                # - HTTP DELETE the controller via the master API
                #
                url = 'https://%s/api/v1beta3/namespaces/default/replicationcontrollers/%s' % (os.environ['KUBERNETES_MASTER'], key)
                reply = requests.delete(url, auth=auth,verify=False)
                code = reply.status_code
                logger.debug('-> DELETE %s (HTTP %d)' % (url, code))
                assert code == 200 or code == 201, 'replication controller deletion failed (HTTP %d)' % code

            #
            # - the 'task' hint is the pod's identifier
            #
            for key, hints in js.items():

                #
                # - HTTP DELETE the pod via the master API
                #
                url = 'https://%s/api/v1beta3/namespaces/default/pods/%s' % (os.environ['KUBERNETES_MASTER'], hints['task'])
                reply = requests.delete(url, auth=auth,verify=False)
                code = reply.status_code
                logger.debug('-> DELETE %s (HTTP %d)' % (url, code))
                assert code == 200 or code == 201, 'pod deletion failed (HTTP %d)' % code

            self.killed = len(js)
            self.ok = 1

        except AssertionError as failure:

            logger.debug('%s : failed to deploy -> %s' % (self.cluster, failure))

        except YAMLError as failure:

            if hasattr(failure, 'problem_mark'):
                mark = failure.problem_mark
                logger.debug('%s : invalid deploy.yml (line %s, column %s)' % (self.cluster, mark.line+1, mark.column+1))

        except Exception as failure:

            logger.debug('%s : failed to deploy -> %s' % (self.cluster, diagnostic(failure)))