Beispiel #1
0
        def configure(self, _):

            #
            # - dig master.mesos
            # - this should give us a list of internal master IPs
            # - please note this will only work if mesos-dns has been setup (and is running)
            #
            _, lines = shell("dig master.mesos +short")
            if lines:
                logger.debug("retrieved %d ips via dig" % len(lines))
                masters = ",".join(["%s:8080" % line for line in lines])

            #
            # - no mesos-dns running ?
            # - if so $MARATHON_MASTER must be defined (legacy behavior)
            #
            else:
                assert "MARATHON_MASTER" in os.environ, "failed to look mesos-dns up and no $MARATHON_MASTER defined"
                masters = os.environ["MARATHON_MASTER"]

            #
            # - run the webserver
            # - don't forget to pass the secret token as an environment variable
            #
            return "python portal.py", {"token": token, "MARATHON_MASTER": masters}
Beispiel #2
0
 def _k8s(token):
     code, lines = shell(
         'curl -f -u %s:%s -k https://%s/api/v1beta3/namespaces/default/%s'
         % (env['KUBERNETES_USER'], env['KUBERNETES_PWD'],
            env['KUBERNETES_MASTER'], token))
     assert code is 0, 'unable to look the RO service up (is the master running ?)'
     return json.loads(''.join(lines))
Beispiel #3
0
        def configure(self, _):

            #
            # - dig master.mesos
            # - this should give us a list of internal master IPs
            # - please note this will only work if mesos-dns has been setup (and is running)
            #
            _, lines = shell('dig master.mesos +short')
            if lines:
                logger.debug('retrieved %d ips via dig' % len(lines))
                masters = ','.join(['%s:8080' % line for line in lines])

            #
            # - no mesos-dns running ?
            # - if so $MARATHON_MASTER must be defined (legacy behavior)
            #
            else:
                assert 'MARATHON_MASTER' in os.environ, 'failed to look mesos-dns up and no $MARATHON_MASTER defined'
                masters = os.environ['MARATHON_MASTER']

            #
            # - run the webserver
            # - don't forget to pass the secret token as an environment variable
            #
            return 'python portal.py', \
                   {
                       'token': token,
                       'MARATHON_MASTER': masters
                   }
        def probe(self, cluster):

            #
            # - check each pod and issue a MNTR command
            # - the goal is to make sure they are all part of the ensemble
            #
            leader = None
            for key, pod in cluster.pods.items():

                ip = pod['ip'] if key != cluster.key else 'localhost'
                port = pod['ports']['2181'] if key != cluster.key else '2181'
                code, lines = shell('echo mntr | nc -w 5 %s %s' % (ip, port))
                assert code == 0, 'failed to connect to pod #%d (is it dead ?)' % pod['seq']

                props = {}
                for line in lines:
                    if line:
                        tokens = line.split('\t')
                        props[tokens[0]] = ' '.join(tokens[1:])

                assert 'zk_server_state' in props, 'pod #%d -> not serving requests (is zk down ?)' % pod['seq']

                state = props['zk_server_state']
                assert state in ['leader', 'follower'], 'pod #%d -> <%s>' % (pod['seq'], state)
                if state == 'leader':
                    leader = props

            assert leader, 'no leader found ?'
            assert int(leader['zk_synced_followers']) == cluster.size - 1, '1+ follower not synced'
            return '%s zk nodes / ~ %s KB' % (leader['zk_znode_count'], leader['zk_approximate_data_size'])
Beispiel #5
0
        def body(self, args, cwd):

            #
            # - force the output to be formatted in JSON
            # - add any variable defined on the command line using -v
            #
            headers = {"Accept": "application/json"}
            if args.variables:
                for value in args.variables:
                    tokens = value.split(":")
                    headers["X-Var-%s" % tokens[0]] = tokens[1]

            #
            # - fetch the uploaded TGZ archive in our temp. directory
            # - compute its SHA1 digest
            # - format the corresponding X-Signature header
            #
            tgz = join(cwd, args.tgz[0])
            code, lines = shell('openssl dgst -sha1 -hmac "%s" %s' % (token, tgz))
            assert code == 0, "failed to sign the archive"
            headers["X-Signature"] = "sha1=%s" % lines[0].split(" ")[1]

            #
            # - fire a POST /run request to ourselves
            # - pass the response back to the CLI
            #
            with open(tgz, "rb") as f:

                files = {"tgz": f.read()}
                reply = requests.post(
                    "http://localhost:5000/run/%s" % "+".join(args.scripts), files=files, headers=headers
                )
                assert reply.status_code < 300, "invalid response (HTTP %d)" % reply.status_code
                js = json.loads(reply.text)
                return 0 if js["ok"] else 1, js["log"]
Beispiel #6
0
    def body(self, args, cwd):

        #
        # - simply execute the snippet from the temporary directory
        # - any file uploaded in the process will be found in there as well
        #
        return shell(args, cwd=cwd)
Beispiel #7
0
        def _remote(cmdline):

            #
            # - this block is taken from cli.py in ochothon
            # - in debug mode the verbatim response from the portal is dumped on stdout
            #
            now = time.time()
            tokens = cmdline.split(' ')
            files = [
                '-F %s=@%s' % (basename(token), expanduser(token))
                for token in tokens if isfile(expanduser(token))
            ]
            line = ' '.join([
                basename(token) if isfile(expanduser(token)) else token
                for token in tokens
            ])
            logger.debug('"%s" -> %s' % (line, portal))
            snippet = 'curl -X POST -H "X-Shell:%s" %s %s/shell' % (
                line, ' '.join(files), portal)
            code, lines = shell(snippet)
            assert code is 0, 'i/o failure (is the proxy portal down ?)'
            js = json.loads(lines[0])
            elapsed = time.time() - now
            logger.debug('<- %s (took %.2f seconds) ->\n\t%s' %
                         (portal, elapsed, '\n\t'.join(js['out'].split('\n'))))
            return js
Beispiel #8
0
        def body(self, args, _):

            #
            # - simply exec the echo
            # - please note we don't need any command line parsing
            # - the output will be returned back to the caller
            #
            return shell('echo "your command was %s"' % args)
Beispiel #9
0
        def body(self, args, _):

            #
            # - simply exec the echo
            # - please note we don't need any command line parsing
            # - the output will be returned back to the caller
            #
            return shell('echo "your command was %s"' % args)
 def rs_status():
     """
     Executes rs.status() against the localhost mongod
     """
     logger.debug("getting replicaset status")
     code, lines = shell("echo 'JSON.stringify(rs.status())' | mongo localhost:27018 --quiet")
     assert code == 0, 'failed to connect to local pod (is it dead ?)'
     return json.loads(' '.join(lines))
 def rs_add(pod):
     """
     Add the pod as a replicaset member using rs.add({_id: pod['seq'], host: pod['ip']:pod['ports']['27018']})
     """
     hoststr = "%s:%d" % (pod['ip'], pod['ports']['27018'])
     doc = {'_id': pod['seq'], 'host': hoststr}
     jsonstr = json.dumps(doc)
     code, _ = shell("echo 'rs.add(%s)' | mongo localhost:27018 --quiet" % jsonstr)
     assert code == 0, 'Unable to do rs.add(%s)' % jsonstr
Beispiel #12
0
                def _2():

                    #
                    # - same as above except for slightly older DCOS releases
                    # - $MESOS_MASTER is located in /opt/mesosphere/etc/mesos-slave
                    #
                    logger.debug('checking /opt/mesosphere/etc/mesos-slave...')
                    _, lines = shell("grep MESOS_MASTER /opt/mesosphere/etc/mesos-slave")
                    return lines[0][18:].split('/')[0]
Beispiel #13
0
                def _3():

                    #
                    # - a regular package install will write the slave settings under /etc/mesos/zk (the snippet in
                    #   there looks like zk://10.0.0.56:2181/mesos)
                    #
                    logger.debug('checking /etc/mesos/zk...')
                    _, lines = shell("cat /etc/mesos/zk")
                    return lines[0][5:].split('/')[0]
Beispiel #14
0
                def _3():

                    #
                    # - a regular package install will write the slave settings under /etc/mesos/zk (the snippet in
                    #   there looks like zk://10.0.0.56:2181/mesos)
                    #
                    logger.debug('checking /etc/mesos/zk...')
                    _, lines = shell("cat /etc/mesos/zk")
                    return lines[0]
Beispiel #15
0
                def _install_from_package():

                    #
                    # - a regular package install will write the slave settings under /etc/mesos/zk
                    # - the snippet in there looks like zk://10.0.0.56:2181/mesos
                    #
                    code, lines = shell("cat /etc/mesos/zk")
                    assert code is 0 and lines[0], 'unable to retrieve the zk connection string'
                    return lines[0][5:].split('/')[0]
Beispiel #16
0
                def _1():

                    #
                    # - most recent DCOS release
                    # - $MESOS_MASTER is located in /opt/mesosphere/etc/mesos-slave-common
                    # - the snippet in there is prefixed by MESOS_ZK=zk://<ip:port>/mesos
                    #
                    logger.debug('checking /opt/mesosphere/etc/mesos-slave-common...')
                    _, lines = shell("grep MESOS_MASTER /opt/mesosphere/etc/mesos-slave-common")
                    return lines[0][18:].split('/')[0]
Beispiel #17
0
                def _install_from_package():

                    #
                    # - a regular package install will write the slave settings under /etc/mesos/zk
                    # - This is mesos-slave /etc/mesos which contains zookeeper
                    # config
                    #
                    code, lines = shell("cat /etc/mesos/zk")
                    assert code is 0 and lines[0], 'unable to retrieve the zk connection string'
                    return lines[0][5:].split('/')[0]
Beispiel #18
0
                def _2():

                    #
                    # - same as above except for slightly older DCOS releases
                    # - $MESOS_MASTER is located in /opt/mesosphere/etc/mesos-slave
                    #
                    logger.debug('checking /opt/mesosphere/etc/mesos-slave...')
                    _, lines = shell(
                        "grep MESOS_MASTER /opt/mesosphere/etc/mesos-slave")
                    return lines[0][13:]
Beispiel #19
0
                def _dcos_deployment():

                    #
                    # - a DCOS slave is setup slightly differently with the settings being environment
                    #   variables set in /opt/mesosphere/etc/mesos-slave
                    # - the snippet in there is prefixed by MESOS_MASTER= and uses an alias
                    # - it looks like MESOS_MASTER=zk://leader.mesos:2181/mesos
                    #
                    code, lines = shell("grep MASTER /opt/mesosphere/etc/mesos-slave")
                    assert code is 0 and lines[0], 'unable to retrieve the zk connection string'
                    return lines[0][18:].split('/')[0]
Beispiel #20
0
                def _1():

                    #
                    # - most recent DCOS release
                    # - $MESOS_MASTER is located in /opt/mesosphere/etc/mesos-slave-common
                    # - the snippet in there is prefixed by MESOS_MASTER=zk://<ip:port>/mesos
                    #
                    logger.debug(
                        'checking /opt/mesosphere/etc/mesos-slave-common...')
                    _, lines = shell(
                        "grep MESOS_MASTER /opt/mesosphere/etc/mesos-slave-common"
                    )
                    return lines[0][13:]
Beispiel #21
0
 def get_pid(self, hints, hints_ignore = None):
     if hints_ignore:
         hints_ignore.append("grep")
     else:
         hints_ignore = ["grep"]
     assert hints, "need at least one hint"
     try:
         _, lines = shell("ps -ef | grep -v %s | grep %s | awk '{print $2}'" % (" | grep -v ".join(hints_ignore),  " | grep ".join(hints)))
         if lines:
             return [int(x) for x in lines]
         else:
             return None
     except Exception:
         return None
Beispiel #22
0
def servo(strict=True, verbose=False):
    try:

        #
        # - retrieve the portal coordinates from /opt/servo/.portal
        # - this file is rendered by the pod script upon boot as a little JSON blob
        # - what we want is the proxy ip/port plus its secret token
        #
        _, lines = shell('cat .portal', cwd='/opt/servo')
        hints = json.loads(lines[0].decode('utf-8'))
        assert hints, '/opt/servo/.portal not found (pod not yet configured ?)'

        def _proxy(cmdline):

            #
            # - this block is taken from cli.py in ochothon
            # - don't forget to add the SHA1 signature
            # - in debug mode the verbatim response from the portal is dumped on stdout
            # - slight modification : we force the json output (-j)
            #
            tokens = cmdline.split(' ') + ['-j']
            files = ['-F %s=@%s' % (basename(token), expanduser(token)) for token in tokens if isfile(expanduser(token))]
            line = ' '.join([basename(token) if isfile(expanduser(token)) else token for token in tokens])
            digest = 'sha1=' + hmac.new(str(hints['token']), line, hashlib.sha1).hexdigest() if hints['token'] else ''
            snippet = 'curl -X POST -H "X-Shell:%s" -H "X-Signature:%s" %s %s/shell' % (line, digest, ' '.join(files), hints['ip'])
            code, lines = shell(snippet)
            assert code is 0, 'is the portal @ %s down ?' % hints['ip']
            js = json.loads(lines[0])
            ok = js['ok']
            if verbose:
                print '[%s] "%s"' % ('passed' if ok else 'failed', cmdline)
            assert not strict or ok, '"%s" failed' % cmdline
            return json.loads(js['out']) if ok else None

        yield _proxy

        #
        # - all clear, return 0 to signal a success
        #
        sys.exit(0)

    except AssertionError as failure:

        print 'failure -> %s' % failure

    except Exception as failure:

        print 'unexpected failure -> %s' % diagnostic(failure)

    sys.exit(1)
Beispiel #23
0
def servo(strict=True, verbose=False):
    try:

        #
        # - retrieve the portal coordinates from /opt/servo/.portal
        # - this file is rendered by the pod script upon boot
        #
        _, lines = shell('cat .portal', cwd='/opt/servo')
        portal = lines[0]
        assert portal, '/opt/servo/.portal not found (pod not yet configured ?)'

        def _proxy(cmdline):

            #
            # - this block is taken from cli.py in ochothon
            # - in debug mode the verbatim response from the portal is dumped on stdout
            # - slight modification : we force the json output (-j)
            #
            tokens = cmdline.split(' ') + ['-j']
            files = ['-F %s=@%s' % (basename(token), expanduser(token)) for token in tokens if isfile(expanduser(token))]
            line = ' '.join([basename(token) if isfile(expanduser(token)) else token for token in tokens])
            snippet = 'curl -X POST -H "X-Shell:%s" %s %s/shell' % (line, ' '.join(files), portal)
            code, lines = shell(snippet)
            assert code is 0, 'is the portal @ %s down ?' % portal
            js = json.loads(lines[0])
            ok = js['ok']
            if verbose:
                print '[%s] "%s"' % ('passed' if ok else 'failed', cmdline)
            assert not strict or ok, '"%s" failed' % cmdline
            return json.loads(js['out']) if ok else None

        yield _proxy

        #
        # - all clear, return 0 to signal a success
        #
        sys.exit(0)

    except AssertionError as failure:

        print 'failure -> %s' % failure

    except Exception as failure:

        print 'unexpected failure -> %s' % diagnostic(failure)

    sys.exit(1)
 def rs_initiate(pods):
     """
     Executes rs.initiate(...) against the local mongod. The _id of members is pod['seq'] and
     host is pod['ip']:pod['ports']['27018']
     """
     rs_name = os.getenv('REPLSET_NAME', 'rs0')
     rs_config_doc = {'_id': rs_name, 'members': []}
     for pod in pods:
         rs_config_doc['members'].append({
             '_id': pod['seq'],
             'host': "%s:%d" % (pod['ip'], pod['ports']['27018'])
         })
     # configure
     jsonstr = json.dumps(rs_config_doc)
     logger.info("initializing replicaset %s", rs_config_doc)
     code, _ = shell("echo 'rs.initiate(%s)' | mongo localhost:27018 --quiet" % jsonstr)
     assert code == 0, 'Unable to do rs.initiate(%s)' % rs_config_doc
Beispiel #25
0
        def _remote(cmdline):

            #
            # - this block is taken from cli.py in ochothon
            # - in debug mode the verbatim response from the portal is dumped on stdout
            #
            now = time.time()
            tokens = cmdline.split(' ')
            files = ['-F %s=@%s' % (basename(token), expanduser(token)) for token in tokens if isfile(expanduser(token))]
            line = ' '.join([basename(token) if isfile(expanduser(token)) else token for token in tokens])
            logger.debug('"%s" -> %s' % (line, portal))
            snippet = 'curl -X POST -H "X-Shell:%s" %s %s/shell' % (line, ' '.join(files), portal)
            code, lines = shell(snippet)
            assert code is 0, 'i/o failure (is the proxy portal down ?)'
            js = json.loads(lines[0])
            elapsed = time.time() - now
            logger.debug('<- %s (took %.2f seconds) ->\n\t%s' % (portal, elapsed, '\n\t'.join(js['out'].split('\n'))))
            return js
Beispiel #26
0
        def _proxy(cmdline):

            #
            # - this block is taken from cli.py in ochothon
            # - in debug mode the verbatim response from the portal is dumped on stdout
            # - slight modification : we force the json output (-j)
            #
            tokens = cmdline.split(' ') + ['-j']
            files = ['-F %s=@%s' % (basename(token), expanduser(token)) for token in tokens if isfile(expanduser(token))]
            line = ' '.join([basename(token) if isfile(expanduser(token)) else token for token in tokens])
            snippet = 'curl -X POST -H "X-Shell:%s" %s %s/shell' % (line, ' '.join(files), portal)
            code, lines = shell(snippet)
            assert code is 0, 'is the portal @ %s down ?' % portal
            js = json.loads(lines[0])
            ok = js['ok']
            if verbose:
                print '[%s] "%s"' % ('passed' if ok else 'failed', cmdline)
            assert not strict or ok, '"%s" failed' % cmdline
            return json.loads(js['out']) if ok else None
Beispiel #27
0
        def _proxy(cmdline):

            #
            # - this block is taken from cli.py in ochothon
            # - don't forget to add the SHA1 signature
            # - in debug mode the verbatim response from the portal is dumped on stdout
            # - slight modification : we force the json output (-j)
            #
            tokens = cmdline.split(' ') + ['-j']
            files = ['-F %s=@%s' % (basename(token), expanduser(token)) for token in tokens if isfile(expanduser(token))]
            line = ' '.join([basename(token) if isfile(expanduser(token)) else token for token in tokens])
            digest = 'sha1=' + hmac.new(str(hints['token']), line, hashlib.sha1).hexdigest() if hints['token'] else ''
            snippet = 'curl -X POST -H "X-Shell:%s" -H "X-Signature:%s" %s %s/shell' % (line, digest, ' '.join(files), hints['ip'])
            code, lines = shell(snippet)
            assert code is 0, 'is the portal @ %s down ?' % hints['ip']
            js = json.loads(lines[0])
            ok = js['ok']
            if verbose:
                print '[%s] "%s"' % ('passed' if ok else 'failed', cmdline)
            assert not strict or ok, '"%s" failed' % cmdline
            return json.loads(js['out']) if ok else None
Beispiel #28
0
        def body(self, args, cwd):

            #
            # - force the output to be formatted in JSON
            # - add any variable defined on the command line using -v
            #
            headers = {'Accept': 'application/json'}
            if args.variables:
                for value in args.variables:
                    tokens = value.split(':')
                    headers['X-Var-%s' % tokens[0]] = tokens[1]

            #
            # - fetch the uploaded TGZ archive in our temp. directory
            # - compute its SHA1 digest
            # - format the corresponding X-Signature header
            #
            tgz = join(cwd, args.tgz[0])
            code, lines = shell('openssl dgst -sha1 -hmac "%s" %s' %
                                (token, tgz))
            assert code == 0, 'failed to sign the archive'
            headers['X-Signature'] = 'sha1=%s' % lines[0].split(' ')[1]

            #
            # - fire a POST /run request to ourselves
            # - pass the response back to the CLI
            #
            with open(tgz, 'rb') as f:

                files = {'tgz': f.read()}
                reply = requests.post('http://localhost:5000/run/%s' %
                                      '+'.join(args.scripts),
                                      files=files,
                                      headers=headers)
                assert reply.status_code < 300, 'invalid response (HTTP %d)' % reply.status_code
                js = json.loads(reply.text)
                return 0 if js['ok'] else 1, js['log']
Beispiel #29
0
        def sanity_check(self, pid):

            #
            # - simply use the provided process ID to start counting time
            # - this is a cheap way to measure the sub-process up-time
            #
            now = time.time()
            if pid != self.pid:
                self.pid = pid
                self.since = now

            lapse = (now - self.since) / 3600.0

            #
            # - include the build tag in the pod's metrics
            # - this tag is added during integration prior to building the docker image
            #
            _, lines = shell('cat BUILD', cwd=self.cwd)

            return \
                {
                    'build': lines[0],
                    'uptime': '%.2f hours (pid %s)' % (lapse, pid)
                }
Beispiel #30
0
        def initialize(self):

            splunk = cfg['splunk']

            env = Environment(loader=FileSystemLoader('/opt/watcher/templates'))
            template = env.get_template('props.conf')
            
            with open('/opt/splunkforwarder/etc/system/local/props.conf', 'wb') as f:
                f.write(template.render(
                    {
                        'sourcetype': splunk['sourcetype']
                    }))

            shell('splunk start --accept-license && splunk edit user admin -password foo -auth admin:changeme')
            
            for url in splunk['forward'].split(','):
                shell('splunk add forward-server %s' % url)

            shell('touch /var/log/watcher.log && splunk add monitor /var/log/watcher.log -index service -sourcetype %s' % splunk['sourcetype'])
Beispiel #31
0
        def initialize(self):

            splunk = cfg['splunk']

            env = Environment(
                loader=FileSystemLoader('/opt/watcher/templates'))
            template = env.get_template('props.conf')

            with open('/opt/splunkforwarder/etc/system/local/props.conf',
                      'wb') as f:
                f.write(template.render({'sourcetype': splunk['sourcetype']}))

            shell(
                'splunk start --accept-license && splunk edit user admin -password foo -auth admin:changeme'
            )

            for url in splunk['forward'].split(','):
                shell('splunk add forward-server %s' % url)

            shell(
                'touch /var/log/watcher.log && splunk add monitor /var/log/watcher.log -index service -sourcetype %s'
                % splunk['sourcetype'])
Beispiel #32
0
        def _from_curl(scripts):

            #
            # - retrieve the X-Signature header
            # - fast-fail on a HTTP 403 if not there or if there is a mismatch
            #
            if not "X-Signature" in request.headers:
                return "", 403

            #
            # - force a json output if the Accept header matches 'application/json'
            # - otherwise default to a text/plain response
            # - create a temporary directory to run from
            #
            ok = 0
            log = []
            alphabet = string.letters + string.digits
            token = "".join(alphabet[ord(c) % len(alphabet)] for c in os.urandom(8))
            raw = request.accept_mimetypes.best_match(["application/json"]) is None
            tmp = tempfile.mkdtemp()
            try:

                #
                # - any request header in the form X-Var-* will be kept around and passed as
                #   an environment variable when executing the script
                # - make sure the variable is spelled in uppercase
                #
                local = {key[6:].upper(): value for key, value in request.headers.items() if key.startswith("X-Var-")}

                #
                # - craft a unique callback URL that points to this pod
                # - this will be passed down to the script to enable transient testing jobs
                #
                cwd = path.join(tmp, "uploaded")
                local["CALLBACK"] = "http://%s/callback/%s" % (env["local"], token)
                blocked[token] = cwd
                for key, value in local.items():
                    log += ["$%s = %s" % (key, value)]

                #
                # - download the archive
                # - compute the HMAC and compare (use our pod token as the key)
                # - fail on a 403 if mismatch
                #
                where = path.join(tmp, "bundle.tgz")
                request.files["tgz"].save(where)
                with open(where, "rb") as f:
                    bytes = f.read()
                    digest = "sha1=" + hmac.new(env["token"], bytes, hashlib.sha1).hexdigest()
                    if digest != request.headers["X-Signature"]:
                        return "", 403

                #
                # - extract it into its own folder
                # - make sure the requested script is there
                #
                code, _ = shell("mkdir uploaded && tar zxf bundle.tgz -C uploaded", cwd=tmp)
                assert code == 0, "unable to open the archive (bogus payload ?)"

                #
                # - decrypt any file whose extension is .aes
                # - just run openssl directly and dump the output in the working directory
                # - note: at this point we just look for .aes file in the top level directory
                #
                for file in os.listdir(cwd):
                    bare, ext = path.splitext(file)
                    if ext != ".aes":
                        continue

                    code, _ = shell(
                        "openssl enc -d -base64 -aes-256-cbc -k %s -in %s -out %s" % (env["token"], file, bare), cwd=cwd
                    )
                    if code == 0:
                        log += ["decrypted %s" % file]

                #
                # - run each script in order
                # - abort immediately if the script exit code is not zero
                # - keep the script output as a json array
                #
                for script in scripts.split("+"):
                    now = time.time()
                    assert path.exists(path.join(cwd, script)), "unable to find %s (check your scripts)" % script
                    code, lines = shell("python %s 2>&1" % script, cwd=cwd, env=local)
                    log += lines + ["%s ran in %d seconds" % (script, int(time.time() - now))]
                    assert code == 0, "%s failed on exit code %d" % (script, code)

                ok = 1

            except AssertionError as failure:

                log += ["failure (%s)" % failure]

            except Exception as failure:

                log += ["unexpected failure (%s)" % diagnostic(failure)]

            finally:

                #
                # - make sure to cleanup our temporary directory
                #
                del blocked[token]
                shutil.rmtree(tmp)

            if raw:

                #
                # - if 'application/json' was not requested simply dump the log as is
                # - force the response code to be HTTP 412 upon failure and HTTP 200 otherwise
                #
                code = 200 if ok else 412
                return "\n".join(log), code, {"Content-Type": "text/plain; charset=utf-8"}

            else:

                #
                # - if 'application/json' was requested always respond with a HTTP 200
                # - the response body then contains our serialized JSON output
                #
                js = {"ok": ok, "log": log}

                return json.dumps(js), 200, {"Content-Type": "application/json; charset=utf-8"}
Beispiel #33
0
        #
        env = environ
        hints = json.loads(env['ochopod'])
        env['OCHOPOD_ZK'] = hints['zk']

        #
        # - Check for passed set of dirty clusters, haproxies, and time period in deployment yaml
        #
        cleaning = env['DIRTY'].split(',') if 'DIRTY' in env else []

        period = float(env['PERIOD']) if 'PERIOD' in env else 60

        #
        # - Get the portal that we found during cluster configuration (see pod/pod.py)
        #
        _, lines = shell('cat /opt/cleaner/.portal')
        portal = lines[0]
        assert portal, '/opt/cleaner/.portal not found (pod not yet configured ?)'
        logger.debug('using proxy @ %s' % portal)

        #
        # - Remote for direct communication with the portal
        #
        def _remote(cmdline):

            #
            # - this block is taken from cli.py in ochothon
            # - in debug mode the verbatim response from the portal is dumped on stdout
            #
            now = time.time()
            tokens = cmdline.split(' ')
Beispiel #34
0
    def run(self):
        try:

            #
            # - we need to pass the framework master IPs around (ugly)
            #
            assert 'MARATHON_MASTER' in os.environ, '$MARATHON_MASTER not specified (check your portal pod)'
            master = choice(os.environ['MARATHON_MASTER'].split(','))
            headers = \
                {
                    'content-type': 'application/json',
                    'accept': 'application/json'
                }

            with open(self.template, 'r') as f:

                #
                # - parse the template yaml file (e.g container definition)
                #
                raw = yaml.load(f)
                assert raw, 'empty YAML input (user error ?)'

                #
                # - merge with our defaults
                # - we want at least the cluster & image settings
                # - TCP 8080 is added by default to the port list
                #
                defaults = \
                    {
                        'start': True,
                        'debug': False,
                        'settings': {},
                        'ports': [8080],
                        'verbatim': {}
                    }

                cfg = merge(defaults, raw)
                assert 'cluster' in cfg, 'cluster identifier undefined (user error ?)'
                assert 'image' in cfg, 'docker image undefined (user error ?)'

                #
                # - if a suffix is specified append it to the cluster identifier
                #
                if self.suffix:
                    cfg['cluster'] = '%s-%s' % (cfg['cluster'], self.suffix)

                #
                # - timestamp the application (we really want a new uniquely identified application)
                # - lookup the optional overrides and merge with our pod settings if specified
                # - this is what happens when the -o option is used
                #
                stamp = datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d-%H-%M-%S')
                qualified = '%s.%s' % (self.namespace, cfg['cluster'])
                application = 'ochopod.%s-%s' % (qualified, stamp)
                if qualified in self.overrides:

                    blk = self.overrides[qualified]
                    logger.debug('%s : overriding %d settings (%s)' % (self.template, len(blk), qualified))
                    cfg['settings'] = merge(cfg['settings'], blk)

                def _nullcheck(cfg, prefix):

                    #
                    # - walk through the settings and flag any null value
                    #
                    missing = []
                    if cfg is not None:
                        for key, value in cfg.items():
                            if value is None:
                                missing += ['%s.%s' % ('.'.join(prefix), key)]
                            elif isinstance(value, dict):
                                missing += _nullcheck(value, prefix + [key])

                    return missing

                missing = _nullcheck(cfg['settings'], ['pod'])
                assert not missing, '%d setting(s) missing ->\n\t - %s' % (len(missing), '\n\t - '.join(missing))

                #
                # - lookup our all the pods for that identifier
                # - get their sequence indices (we'll use it to phase out them out)
                # - if the target # of pods we want is not specified default to 1 unless we are cycling
                # - set it to the current # of pods in that case
                #
                def _query(zk):
                    replies = fire(zk, qualified, 'info')
                    return [(hints['process'], seq) for _, (seq, hints, code) in replies.items() if code == 200]

                prev = run(self.proxy, _query)
                if self.cycle and not self.pods:
                    self.pods = sum(1 if state != 'dead' else 0 for state, _ in prev)

                #
                # - if we still have no target default it to 1 single pod
                #
                if not self.pods:
                    self.pods = 1

                #
                # - setup our port list
                # - the port binding is specified either by an integer (container port -> dynamic mesos port), by
                #   two integers (container port -> host port) or by an integer followed by a * (container port ->
                #   same port on the host)
                # - the marathon pods must by design map /etc/mesos
                #
                def _parse_port(token):
                    if isinstance(token, int):
                        return {'containerPort': token}
                    elif isinstance(token, str) and token.endswith(' *'):
                        port = int(token[:-2])
                        return {'containerPort': port, 'hostPort': port}
                    elif isinstance(token, str):
                        ports = token.split(' ')
                        assert len(ports) == 2, 'invalid port syntax (must be two integers separated by 1+ spaces)'
                        return {'containerPort': int(ports[0]), 'hostPort': int(ports[1])}
                    else:
                        assert 0, 'invalid port syntax ("%s")' % token

                #
                # - note the marathon-ec2 ochopod bindings will set the application hint automatically
                #   via environment variable (e.g no need to specify it here)
                # - make sure to mount /etc/mesos and /opt/mesosphere to account for various mesos installs
                #
                ports = [_parse_port(token) for token in cfg['ports']] if 'ports' in cfg else []
                spec = \
                    {
                        'id': application,
                        'instances': self.pods,
                        'env':
                            {
                                'ochopod_cluster': cfg['cluster'],
                                'ochopod_debug': str(cfg['debug']).lower(),
                                'ochopod_start': str(cfg['start']).lower(),
                                'ochopod_namespace': self.namespace,
                                'pod': json.dumps(cfg['settings'])
                            },
                        'container':
                            {
                                'type': 'DOCKER',
                                'docker':
                                    {
                                        'forcePullImage': True,
                                        'image': cfg['image'],
                                        'network': 'BRIDGE',
                                        'portMappings': ports
                                    },
                                'volumes':
                                    [
                                        {
                                            'containerPath': '/etc/mesos',
                                            'hostPath': '/etc/mesos',
                                            'mode': 'RO'
                                        },
                                        {
                                            'containerPath': '/opt/mesosphere',
                                            'hostPath': '/opt/mesosphere',
                                            'mode': 'RO'
                                        }
                                    ]
                            }
                    }

                #
                # - if we have a 'verbatim' block in our image definition yaml, merge it now
                #
                if 'verbatim' in cfg:
                    spec = merge(cfg['verbatim'], spec)

                #
                # - pick a marathon master at random
                # - fire the POST /v2/apps to create our application
                # - this will indirectly spawn our pods
                #
                url = 'http://%s/v2/apps' % master
                reply = post(url, data=json.dumps(spec), headers=headers)
                code = reply.status_code
                logger.debug('-> %s (HTTP %d)' % (url, code))
                assert code == 200 or code == 201, 'submission failed (HTTP %d)' % code

                #
                # - wait for all the pods to be in the 'running' mode
                # - the 'application' hint is set by design to the marathon application identifier
                # - the sequence counters allocated to our new pods are returned as well
                #
                target = ['dead', 'running'] if self.strict else ['dead', 'stopped', 'running']
                @retry(timeout=self.timeout, pause=3, default={})
                def _spin():
                    def _query(zk):
                        replies = fire(zk, qualified, 'info')
                        return [(hints['process'], seq) for seq, hints, _ in replies.values()
                                if hints['application'] == application and hints['process'] in target]

                    js = run(self.proxy, _query)
                    assert len(js) == self.pods, 'not all pods running yet'
                    return js

                js = _spin()
                running = sum(1 for state, _ in js if state is not 'dead')
                up = [seq for _, seq in js]
                self.out['up'] = up
                self.out['ok'] = self.pods == running
                logger.debug('%s : %d/%d pods are running ' % (self.template, running, self.pods))

                if not up:

                    #
                    # - nothing is running (typically because the image has an issue and is not
                    #   not booting the ochopod script for instance, which happens often)
                    # - in that case fire a HTTP DELETE against the marathon application to clean it up
                    #
                    url = 'http://%s/v2/apps/%s' % (master, application)
                    reply = delete(url, headers=headers)
                    code = reply.status_code
                    logger.debug('-> %s (HTTP %d)' % (url, code))
                    assert code == 200 or code == 204, 'application deletion failed (HTTP %d)' % code

                elif self.cycle:

                    #
                    # - phase out & clean-up the pods that were previously running
                    # - simply exec() the kill tool for this
                    #
                    time.sleep(self.cycle)
                    down = [seq for _, seq in prev]
                    code, _ = shell('toolset kill %s -i %s -d' % (qualified, ' '.join(['%d' % seq for seq in down])))
                    assert code == 0, 'failed to phase out %d pods' % len(prev)
                    self.out['down'] = down

        except AssertionError as failure:

            logger.debug('%s : failed to deploy -> %s' % (self.template, failure))

        except YAMLError as failure:

            if hasattr(failure, 'problem_mark'):
                mark = failure.problem_mark
                logger.debug('%s : invalid deploy.yml (line %s, column %s)' % (self.template, mark.line+1, mark.column+1))

        except Exception as failure:

            logger.debug('%s : failed to deploy -> %s' % (self.template, diagnostic(failure)))
Beispiel #35
0
        # - pass down the ZK ensemble coordinate
        #
        env = environ
        hints = json.loads(env['ochopod'])
        env['OCHOPOD_ZK'] = hints['zk']

        #
        # - Check for passed set of clusters to be watched in deployment yaml
        #
        watching = env['DAYCARE'].split(',') if 'DAYCARE' in env else ['*']
        period = float(env['PERIOD']) if 'PERIOD' in env else 60

        #
        # - Get the portal that we found during cluster configuration (see pod/pod.py)
        #
        _, lines = shell('cat /opt/watcher/.portal')
        portal = lines[0]
        assert portal, '/opt/watcher/.portal not found (pod not yet configured ?)'
        logger.debug('using proxy @ %s' % portal)

        #
        # - Prepare message logging
        #
        from logging import INFO, Formatter
        from logging.config import fileConfig
        from logging.handlers import RotatingFileHandler
        #
        # - the location on disk used for logging watcher messages
        #
        message_file = '/var/log/watcher.log'
Beispiel #36
0
        hints = json.loads(env['ochopod'])
        env['OCHOPOD_ZK'] = hints['zk']

        #
        # - Check for passed set of scalee clusters, haproxies, and time period in deployment yaml
        #
        scalees = env['SCALEES'].split(',') if 'SCALEES' in env else []

        haproxies = env['HAPROXIES'].split(',') if 'HAPROXIES' in env else []

        period = float(env['PERIOD']) if 'PERIOD' in env else 60

        #
        # - Get the portal that we found during cluster configuration (see pod/pod.py)
        #
        _, lines = shell('cat /opt/scaler/.portal')
        portal = lines[0]
        assert portal, '/opt/scaler/.portal not found (pod not yet configured ?)'
        logger.debug('using proxy @ %s' % portal)

        #
        # - Remote for direct communication with the portal
        #
        def _remote(cmdline):

            #
            # - this block is taken from cli.py in ochothon
            # - in debug mode the verbatim response from the portal is dumped on stdout
            #
            now = time.time()
            tokens = cmdline.split(' ')
Beispiel #37
0
        # - pass down the ZK ensemble coordinate
        #
        env = environ
        hints = json.loads(env['ochopod'])
        env['OCHOPOD_ZK'] = hints['zk']

        #
        # - Check for passed set of clusters to be watched in deployment yaml
        #
        watching = env['DAYCARE'].split(',') if 'DAYCARE' in env else ['*'] 
        period = float(env['PERIOD']) if 'PERIOD' in env else 60

        #
        # - Get the portal that we found during cluster configuration (see pod/pod.py)
        #
        _, lines = shell('cat /opt/watcher/.portal')
        portal = lines[0]
        assert portal, '/opt/watcher/.portal not found (pod not yet configured ?)'
        logger.debug('using proxy @ %s' % portal)
        
        #
        # - Prepare message logging
        #
        from logging import INFO, Formatter
        from logging.config import fileConfig
        from logging.handlers import RotatingFileHandler
        #
        # - the location on disk used for logging watcher messages
        #
        message_file = '/var/log/watcher.log'
Beispiel #38
0
        def body(self, args):

            #
            # - setup a temp directory
            # - use it to store a tar of the current folder
            #
            stated = time.time()
            tmp = tempfile.mkdtemp()
            try:

                #
                # - tar the whole thing
                # - loop over the specified image tags
                #
                code, _ = shell('tar zcf %s/bundle.tgz *' % tmp)
                assert code == 0, 'failed to tar'
                for tag in args.tags.split(','):

                    image = '%s:%s' % (args.repo[0], tag)

                    #
                    # - send the archive over to the underlying docker daemon
                    # - make sure to remove the intermediate containers
                    #
                    tick = time.time()
                    built, output = docker.build(path='%s/bundle.tgz' % tmp, pull=True, forcerm=True, tag=image)
                    assert built, 'empty docker output (failed to build or docker error ?)'
                    logger.debug('built image %s in %d seconds' % (image, time.time() - tick))

                    #
                    # - cat our .dockercfg (which is mounted)
                    # - craft the authentication header required for the push
                    # - push the image using the specified tag
                    #
                    auth = docker.login('autodeskcloud','/host/.docker/config.json')

                    tick = time.time()
                    docker.push(image)
                    logger.debug('pushed image %s to %s in %d seconds' % (image, auth['serveraddress'], time.time() - tick))

                    #
                    # - remove the image we just built if not latest
                    # - this is done to avoid keeping around too many tagged images
                    #
                    if tag != 'latest':
                        docker.remove_image(image, force=True)

                #
                # - clean up and remove any untagged image
                # - this is important otherwise the number of images will slowly creep up
                #
                images = docker.images(quiet=True, all=True)
                victims = [item['Id'] for item in images if item['RepoTags'] == ['<none>:<none>']]
                for victim in victims:
                    logger.debug('removing untagged image %s' % victim)
                    docker.remove_image(victim, force=True)

            finally:

                #
                # - make sure to cleanup our temporary directory
                #
                shutil.rmtree(tmp)

            lapse = int(time.time() - stated)
            logger.info('%s built and pushed in %d seconds' % (args.repo[0], lapse))
            return 0
Beispiel #39
0
                                logger.info('wiped out %s' % cached)
                            except IOError:
                                pass

                        repo = path.join(cached, cfg['name'])
                        if not path.exists(repo):

                            #
                            # - the repo is not in our cache
                            # - git clone it
                            #
                            os.makedirs(cached)
                            logger.info('cloning %s [%s]' % (tag, branch))
                            url = 'https://%s' % cfg['git_url'][6:]
                            code, _ = shell(
                                'git clone -b %s --single-branch %s' %
                                (branch, url),
                                cwd=cached)
                            assert code == 0, 'unable to clone %s' % url
                        else:

                            #
                            # - the repo is already in there
                            # - git pull
                            #
                            shell('git pull', cwd=repo)

                        #
                        # - checkout the specified commit hash
                        #
                        logger.info('checkout @ %s' % sha[0:10])
                        code, _ = shell('git checkout %s' % sha, cwd=repo)
Beispiel #40
0
        def body(self, args):

            #
            # - setup a temp directory
            # - use it to store a tar of the current folder
            #
            stated = time.time()
            tmp = tempfile.mkdtemp()
            try:

                #
                # - tar the whole thing
                # - loop over the specified image tags
                #
                code, _ = shell("tar zcf %s/bundle.tgz *" % tmp)
                assert code == 0, "failed to tar"
                for tag in args.tags.split(","):

                    #
                    # - send the archive over to the underlying docker daemon
                    # - make sure to remove the intermediate containers
                    # - by design our container runs a socat on TCP 9001
                    #
                    tick = time.time()
                    _, lines = shell(
                        'curl -H "Content-Type:application/octet-stream" '
                        "--data-binary @bundle.tgz "
                        '"http://localhost:9001/build?forcerm=1\&t=%s:%s"' % (args.repo[0], tag),
                        cwd=tmp,
                    )
                    assert len(lines) > 1, "empty docker output (failed to build or docker error ?)"
                    last = json.loads(lines[-1])

                    #
                    # - the only way to test out for failure is to peek at the end of the docker output
                    #
                    lapse = time.time() - tick
                    assert "error" not in last, last["error"]
                    logger.debug("built tag %s in %d seconds" % (tag, lapse))

                    #
                    # - cat our .dockercfg (which is mounted)
                    # - craft the authentication header required for the push
                    # - push the image using the specified tag
                    #
                    _, lines = shell("cat /host/.docker/config.json")
                    assert lines, "was docker login run (no config.json found) ?"
                    js = json.loads(" ".join(lines))
                    assert "auths" in js, "invalid config.json setup (unsupported docker install ?)"
                    for url, payload in js["auths"].items():
                        tokens = base64.b64decode(payload["auth"]).split(":")
                        host = urlparse(url).hostname
                        credentials = {
                            "serveraddress": host,
                            "username": tokens[0],
                            "password": tokens[1],
                            "email": payload["email"],
                            "auth": "",
                        }

                        tick = time.time()
                        auth = base64.b64encode(json.dumps(credentials))
                        shell(
                            'curl -X POST -H "X-Registry-Auth:%s" '
                            '"http://localhost:9001/images/%s/push?tag=%s"' % (auth, args.repo[0], tag)
                        )
                        lapse = time.time() - tick
                        logger.debug("pushed tag %s to %s in %d seconds" % (tag, host, lapse))

                    #
                    # - remove the image we just built if not latest
                    # - this is done to avoid keeping around too many tagged images
                    #
                    if tag != "latest":
                        shell('curl -X DELETE "http://localhost:9001/images/%s:%s?force=true"' % (args.repo[0], tag))

                #
                # - clean up and remove any untagged image
                # - this is important otherwise the number of images will slowly creep up
                #
                _, lines = shell('curl "http://localhost:9001/images/json?all=0"')
                js = json.loads(lines[0])
                victims = [item["Id"] for item in js if item["RepoTags"] == ["<none>:<none>"]]
                for victim in victims:
                    logger.debug("removing untagged image %s" % victim)
                    shell('curl -X DELETE "http://localhost:9001/images/%s?force=true"' % victim)

            finally:

                #
                # - make sure to cleanup our temporary directory
                #
                shutil.rmtree(tmp)

            lapse = int(time.time() - stated)
            logger.info("%s built and pushed in %d seconds" % (args.repo[0], lapse))
            return 0
Beispiel #41
0
 def _peek(token):
     _, lines = shell('curl --max-time 1 -f http://169.254.169.254/latest/meta-data/%s' % token)
     return lines[0] if lines else ''
Beispiel #42
0
        def _from_curl(scripts):

            #
            # - retrieve the X-Signature header
            # - fast-fail on a HTTP 403 if not there or if there is a mismatch
            #
            if not 'X-Signature' in request.headers:
                return '', 403

            #
            # - split the last URI token in case multiple scripts are specified
            #

            #
            # - force a json output if the Accept header matches 'application/json'
            # - otherwise default to a text/plain response
            # - create a temporary directory to run from
            #
            ok = 0
            log = []
            alphabet = string.letters + string.digits
            token = ''.join(alphabet[ord(c) % len(alphabet)] for c in os.urandom(8))
            raw = request.accept_mimetypes.best_match(['application/json']) is None
            tmp = tempfile.mkdtemp()
            try:

                #
                # - any request header in the form X-Var-* will be kept around and passed as
                #   an environment variable when executing the script
                # - make sure the variable is spelled in uppercase
                #
                local = {key[6:].upper(): value for key, value in request.headers.items() if key.startswith('X-Var-')}

                #
                # - craft a unique callback URL that points to this pod
                # - this will be passed down to the script to enable transient testing jobs
                #
                cwd = path.join(tmp, 'uploaded')
                local['CALLBACK'] = 'http://%s/callback/%s' % (env['local'], token)
                blocked[token] = cwd
                for key, value in local.items():
                    log += ['$%s = %s' % (key, value)]

                #
                # - download the archive
                # - compute the HMAC and compare (use our pod token as the key)
                # - fail on a 403 if mismatch
                #
                where = path.join(tmp, 'bundle.tgz')
                request.files['tgz'].save(where)
                with open(where, 'rb') as f:
                    bytes = f.read()
                    digest = 'sha1=' + hmac.new(env['token'], bytes, hashlib.sha1).hexdigest()
                    if digest != request.headers['X-Signature']:
                        return '', 403

                #
                # - extract it into its own folder
                # - make sure the requested script is there
                #
                code, _ = shell('mkdir uploaded && tar zxf bundle.tgz -C uploaded', cwd=tmp)
                assert code == 0, 'unable to open the archive (bogus payload ?)'

                #
                # - decrypt any file whose extension is .aes
                # - just run openssl directly and dump the output in the working directory
                # - note: at this point we just look for .aes file in the top level directory
                #
                for file in os.listdir(cwd):
                    bare, ext = path.splitext(file)
                    if ext != '.aes':
                        continue

                    code, _ = shell('openssl enc -d -base64 -aes-256-cbc -k %s -in %s -out %s' % (env['token'], file, bare), cwd=cwd)
                    if code == 0:
                        log += ['decrypted %s' % file]

                #
                # - run each script in order
                # - abort immediately if the script exit code is not zero
                # - keep the script output as a json array
                #
                for script in scripts.split('+'):
                    now = time.time()
                    assert path.exists(path.join(cwd, script)), 'unable to find %s (check your scripts)' % script
                    code, lines = shell('python %s 2>&1' % script, cwd=cwd, env=local)
                    log += lines + ['%s ran in %d seconds' % (script, int(time.time() - now))]
                    assert code == 0, '%s failed on exit code %d' % (script, code)

                ok = 1

            except AssertionError as failure:

                log += ['failure (%s)' % failure]

            except Exception as failure:

                log += ['unexpected failure (%s)' % diagnostic(failure)]

            finally:

                #
                # - make sure to cleanup our temporary directory
                #
                del blocked[token]
                shutil.rmtree(tmp)

            if raw:

                #
                # - if 'application/json' was not requested simply dump the log as is
                # - force the response code to be HTTP 412 upon failure and HTTP 200 otherwise
                #
                code = 200 if ok else 412
                return '\n'.join(log), code, \
                    {
                        'Content-Type': 'text/plain; charset=utf-8'
                    }

            else:

                #
                # - if 'application/json' was requested always respond with a HTTP 200
                # - the response body then contains our serialized JSON output
                #
                js = \
                    {
                        'ok': ok,
                        'log': log
                    }

                return json.dumps(js), 200, \
                    {
                        'Content-Type': 'application/json; charset=utf-8'
                    }
Beispiel #43
0
                                shutil.rmtree(tmp)
                                logger.info('wiped out %s' % tmp)
                            except IOError:
                                pass

                        repo = path.join(tmp, cfg['name'])
                        if not path.exists(repo):

                            #
                            # - the repo is not in our cache
                            # - git clone it
                            #
                            os.makedirs(tmp)
                            logger.info('cloning %s' % tag)
                            url = 'https://%s' % cfg['git_url'][6:]
                            code, _ = shell('git clone -b master --single-branch %s' % url, cwd=tmp)
                            assert code == 0, 'unable to clone %s' % url
                        else:

                            #
                            # - the repo is already in there
                            # - git pull
                            #
                            shell('git pull', cwd=repo)

                        #
                        # - checkout the specified commit hash
                        #
                        logger.info('checkout @ %s' % sha[0:10])
                        code, _ = shell('git checkout %s' % sha, cwd=repo)
                        assert code == 0, 'unable to checkout %s (wrong credentials and/or git issue ?)' % sha[0:10]
Beispiel #44
0
                                shutil.rmtree(cached)
                                logger.info('wiped out %s' % cached)
                            except IOError:
                                pass

                        repo = path.join(cached, cfg['name'])
                        if not path.exists(repo):

                            #
                            # - the repo is not in our cache
                            # - git clone it
                            #
                            os.makedirs(cached)
                            logger.info('cloning %s [%s]' % (tag, branch))
                            url = 'https://%s' % cfg['git_url'][6:]
                            code, _ = shell('git clone -b %s --single-branch %s' % (branch, url), cwd=cached)
                            assert code == 0, 'unable to clone %s' % url
                        else:

                            #
                            # - the repo is already in there
                            # - git pull
                            #
                            shell('git pull', cwd=repo)

                        #
                        # - checkout the specified commit hash
                        #
                        logger.info('checkout @ %s' % sha[0:10])
                        code, _ = shell('git checkout %s' % sha, cwd=repo)
                        assert code == 0, 'unable to checkout %s (wrong credentials and/or git issue ?)' % sha[0:10]
Beispiel #45
0
        def body(self, args):

            #
            # - setup a temp directory
            # - use it to store a tar of the current folder
            #
            stated = time.time()
            tmp = tempfile.mkdtemp()
            try:

                #
                # - tar the whole thing
                # - loop over the specified image tags
                #
                code, _ = shell('tar zcf %s/bundle.tgz *' % tmp)
                assert code == 0, 'failed to tar'
                for tag in args.tags.split(','):

                    image = '%s:%s' % (args.repo[0], tag)

                    #
                    # - send the archive over to the underlying docker daemon
                    # - make sure to remove the intermediate containers
                    #
                    tick = time.time()
                    built, output = docker.build(path='%s/bundle.tgz' % tmp,
                                                 pull=True,
                                                 forcerm=True,
                                                 tag=image)
                    assert built, 'empty docker output (failed to build or docker error ?)'
                    logger.debug('built image %s in %d seconds' %
                                 (image, time.time() - tick))

                    #
                    # - cat our .dockercfg (which is mounted)
                    # - craft the authentication header required for the push
                    # - push the image using the specified tag
                    #
                    auth = docker.login('autodeskcloud',
                                        '/host/.docker/config.json')

                    tick = time.time()
                    docker.push(image)
                    logger.debug(
                        'pushed image %s to %s in %d seconds' %
                        (image, auth['serveraddress'], time.time() - tick))

                    #
                    # - remove the image we just built if not latest
                    # - this is done to avoid keeping around too many tagged images
                    #
                    if tag != 'latest':
                        docker.remove_image(image, force=True)

                #
                # - clean up and remove any untagged image
                # - this is important otherwise the number of images will slowly creep up
                #
                images = docker.images(quiet=True, all=True)
                victims = [
                    item['Id'] for item in images
                    if item['RepoTags'] == ['<none>:<none>']
                ]
                for victim in victims:
                    logger.debug('removing untagged image %s' % victim)
                    docker.remove_image(victim, force=True)

            finally:

                #
                # - make sure to cleanup our temporary directory
                #
                shutil.rmtree(tmp)

            lapse = int(time.time() - stated)
            logger.info('%s built and pushed in %d seconds' %
                        (args.repo[0], lapse))
            return 0
Beispiel #46
0
 def _peek(token, strict=True):
     code, lines = shell('curl -f http://169.254.169.254/latest/meta-data/%s' % token)
     assert not strict or code is 0, 'unable to lookup EC2 metadata for %s (are you running on EC2 ?)' % token
     return lines[0]
Beispiel #47
0
        def body(self, args):

            #
            # - setup a temp directory
            # - use it to store a tar of the current folder
            #
            stated = time.time()
            tmp = tempfile.mkdtemp()
            try:

                #
                # - tar the whole thing
                # - loop over the specified image tags
                #
                code, _ = shell('tar zcf %s/bundle.tgz *' % tmp)
                assert code == 0, 'failed to tar'
                for tag in args.tags.split(','):

                    #
                    # - send the archive over to the underlying docker daemon
                    # - make sure to remove the intermediate containers
                    # - by design our container runs a socat on TCP 9001
                    #
                    tick = time.time()
                    _, lines = shell(
                        'curl -H "Content-Type:application/octet-stream" '
                        '--data-binary @bundle.tgz '
                        '"http://localhost:9001/build?forcerm=1\&t=%s:%s"' %
                        (args.repo[0], tag),
                        cwd=tmp)
                    assert len(
                        lines
                    ) > 1, 'empty docker output (failed to build or docker error ?)'
                    last = json.loads(lines[-1])

                    #
                    # - the only way to test out for failure is to peek at the end of the docker output
                    #
                    lapse = time.time() - tick
                    assert 'error' not in last, last['error']
                    logger.debug('built tag %s in %d seconds' % (tag, lapse))

                    #
                    # - cat our .dockercfg (which is mounted)
                    # - craft the authentication header required for the push
                    # - push the image using the specified tag
                    #
                    _, lines = shell('cat /host/.docker/config.json')
                    assert lines, 'was docker login run (no config.json found) ?'
                    js = json.loads(' '.join(lines))
                    assert 'auths' in js, 'invalid config.json setup (unsupported docker install ?)'
                    for url, payload in js['auths'].items():
                        tokens = base64.b64decode(payload['auth']).split(':')
                        host = urlparse(url).hostname
                        credentials = \
                            {
                                'serveraddress': host,
                                'username': tokens[0],
                                'password': tokens[1],
                                'email': payload['email'],
                                'auth': ''
                            }

                        tick = time.time()
                        auth = base64.b64encode(json.dumps(credentials))
                        shell('curl -X POST -H "X-Registry-Auth:%s" '
                              '"http://localhost:9001/images/%s/push?tag=%s"' %
                              (auth, args.repo[0], tag))
                        lapse = time.time() - tick
                        logger.debug('pushed tag %s to %s in %d seconds' %
                                     (tag, host, lapse))

                    #
                    # - remove the image we just built if not latest
                    # - this is done to avoid keeping around too many tagged images
                    #
                    if tag != 'latest':
                        shell(
                            'curl -X DELETE "http://localhost:9001/images/%s:%s?force=true"'
                            % (args.repo[0], tag))

                #
                # - clean up and remove any untagged image
                # - this is important otherwise the number of images will slowly creep up
                #
                _, lines = shell(
                    'curl "http://localhost:9001/images/json?all=0"')
                js = json.loads(lines[0])
                victims = [
                    item['Id'] for item in js
                    if item['RepoTags'] == ['<none>:<none>']
                ]
                for victim in victims:
                    logger.debug('removing untagged image %s' % victim)
                    shell(
                        'curl -X DELETE "http://localhost:9001/images/%s?force=true"'
                        % victim)

            finally:

                #
                # - make sure to cleanup our temporary directory
                #
                shutil.rmtree(tmp)

            lapse = int(time.time() - stated)
            logger.info('%s built and pushed in %d seconds' %
                        (args.repo[0], lapse))
            return 0
Beispiel #48
0
logger = logging.getLogger('ochopod')

if __name__ == '__main__':

    #
    # - generate a random 32 characters token (valid for the lifetime of the pod)
    # - use it to implement a SHA1 digest verification
    # - this token can also be defined when deploying the pod
    #
    settings = json.loads(os.environ['pod'])
    alphabet = string.letters + string.digits + '+/'
    randomized = ''.join(alphabet[ord(c) % len(alphabet)]
                         for c in os.urandom(32))
    token = settings['token'] if 'token' in settings else randomized
    shell("echo 'jenkins:%s' | sudo -S chpasswd" % token)

    class Run(Tool):
        """
        Dedicated tool to upload/trigger CD scripts from the ochothon CLI. The tool will perform the SHA1
        signature and allow to specify arbitrary variables on the command line. The servo output will passed back
        to the CLI.

        CLI usage:
        $ exec *.servo --force run my-scripts-folder script.py --variables key:value
        """

        tag = 'run'

        def define_cmdline_parsing(self, parser):
Beispiel #49
0
 def _aws(token):
     code, lines = shell(
         'curl -f http://169.254.169.254/latest/meta-data/%s' %
         token)
     assert code is 0, 'unable to lookup EC2 metadata for %s (are you running on EC2 ?)' % token
     return lines[0]
Beispiel #50
0
 def _peek(snippet):
     _, lines = shell(snippet)
     return lines[0] if lines else ''
Beispiel #51
0
 def _peek(snippet):
     _, lines = shell(snippet)
     return lines[0] if lines else ''
Beispiel #52
0
    def boot(self, lifecycle, model=Reactive, local=0):

        #
        # - quick check to make sure we get the right implementations
        #
        assert issubclass(model,
                          Model), 'model must derive from ochopod.api.Model'
        assert issubclass(
            lifecycle,
            LifeCycle), 'lifecycle must derive from ochopod.api.LifeCycle'

        #
        # - start logging to /var/log/ochopod.log
        #
        logger.info('EC2 marathon bindings started')
        web = Flask(__name__)

        #
        # - default presets in case we run outside of marathon (local vm testing)
        # - any environment variable prefixed with "ochopod." is of interest for us (e.g this is what the user puts
        #   in the marathon application configuration for instance)
        # - the other settings come from marathon (namely the port bindings & application/task identifiers)
        # - the MESOS_TASK_ID is important to keep around to enable task deletion via the marathon REST API
        #
        env = \
            {
                'ochopod_cluster': '',
                'ochopod_debug': 'false',
                'ochopod_local': 'false',
                'ochopod_namespace': 'marathon',
                'ochopod_port': '8080',
                'MESOS_TASK_ID': '',
                'MARATHON_APP_ID': '/local',
                'PORT_8080': '8080'
            }

        env.update(os.environ)
        ochopod.enable_cli_log(debug=env['ochopod_debug'] == 'true')
        try:

            #
            # - grab our environment variables (which are set by the marathon executor)
            # - extract the port bindings and construct a small remapping dict
            #
            ports = {}
            logger.debug(
                'environment ->\n%s' %
                '\n'.join(['\t%s -> %s' % (k, v) for k, v in env.items()]))
            for key, val in env.items():
                if key.startswith('PORT_'):
                    ports[key[5:]] = int(val)

            #
            # - keep any "ochopod_" environment variable & trim its prefix
            # - default all our settings, especially the mandatory ones
            # - the ip and zookeeper are defaulted to localhost to enable easy testing
            #
            hints = {
                k[8:]: v
                for k, v in env.items() if k.startswith('ochopod_')
            }
            hints.update({
                'fwk': 'marathon',
                'application': env['MARATHON_APP_ID'][1:],
                'task': env['MESOS_TASK_ID'],
                'ports': ports,
            })

            if local or hints['local'] == 'true':

                #
                # - we are running in local mode (e.g on a dev workstation)
                # - default everything to localhost
                #
                logger.info(
                    'running in local mode (make sure you run a standalone zookeeper)'
                )
                hints.update({
                    'node': 'local',
                    'public': '127.0.0.1',
                    'ip': '127.0.0.1',
                    'zk': '127.0.0.1:2181'
                })
            else:

                #
                # - we are (assuming to be) deployed on EC2
                # - get our underlying metadata using curl
                #
                def _peek(token):
                    code, lines = shell(
                        'curl -f http://169.254.169.254/latest/meta-data/%s' %
                        token)
                    assert code is 0, 'unable to lookup EC2 metadata for %s (are you running on EC2 ?)' % token
                    return lines[0]

                #
                # - get our local and public IPV4 addresses
                #
                hints['ip'] = _peek('local-ipv4')
                hints['public'] = _peek('public-ipv4')

                #
                # - the "node" will show up as the EC2 instance ID
                #
                hints['node'] = _peek('instance-id')

                #
                # - the underlying /etc/mesos is assumed to be mounted
                # - go in there fetch our zookeeper connection string
                # - warning, the connection string mesos uses is formatted like zk://<ip:port>,..,<ip:port>/mesos
                # - just keep the ip & port part
                #
                code, lines = shell("cat /etc/mesos/zk")
                assert code is 0, 'could not read from /etc/mesos (are you mounting it ?)'
                hints['zk'] = lines[0][5:].split('/')[0]

            #
            # - the cluster must be fully qualified with a namespace (which is defaulted anyway)
            # - if the cluster is not specified use the marathon application identifier as a fallback
            #
            assert hints[
                'namespace'], 'no cluster namespace defined (user error ?)'
            if not hints['cluster']:
                logger.debug(
                    'cluster identified not defined, falling back on %s' %
                    hints['application'])
                hints['cluster'] = hints['application']

            #
            # - start the life-cycle actor which will pass our hints (as a json object) to its underlying sub-process
            # - start our coordinator which will connect to zookeeper and attempt to lead the cluster
            # - upon grabbing the lock the model actor will start and implement the configuration process
            # - the hints are a convenient bag for any data that may change at runtime and needs to be returned (via
            #   the HTTP POST /info request)
            # - what's being registered in zookeeper is immutable though and decorated with additional details by
            #   the coordinator (especially the pod index which is derived from zookeeper)
            #
            latch = ThreadingFuture()
            logger.info('starting %s.%s (marathon/ec2) @ %s' %
                        (hints['namespace'], hints['cluster'], hints['node']))
            breadcrumbs = deepcopy(hints)
            env.update({'ochopod': json.dumps(hints)})
            executor = lifecycle.start(env, latch, hints)
            coordinator = Coordinator.start(hints['zk'].split(','),
                                            hints['namespace'],
                                            hints['cluster'],
                                            int(hints['port']), breadcrumbs,
                                            model, hints)

            #
            # - external hook forcing a coordinator reset
            # - this will force a re-connection to zookeeper and pod registration
            # - please note this will not impact the pod lifecycle (e.g the underlying sub-process will be
            #   left running)
            #
            @web.route('/reset', methods=['POST'])
            def _reset():
                coordinator.tell({'request': 'reset'})
                return '{}', 200

            #
            # - external hook exposing information about our pod
            # - this is a subset of what's registered in zookeeper at boot-time
            # - the data is dynamic and updated from time to time by the model and executor actors
            #
            @web.route('/info', methods=['POST'])
            def _info():
                keys = \
                    [
                        'application',
                        'ip',
                        'node',
                        'port',
                        'ports',
                        'process',
                        'public',
                        'state',
                        'task'
                    ]

                subset = dict(filter(lambda i: i[0] in keys,
                                     hints.iteritems()))
                return json.dumps(subset), 200

            #
            # - external hook exposing our circular log
            # - reverse and dump ochopod.log as a json array
            #
            @web.route('/log', methods=['POST'])
            def _log():
                with open(ochopod.LOG, 'r+') as log:
                    lines = [line for line in log]
                    return json.dumps(lines), 200

            #
            # - web-hook used to receive requests from the leader or the CLI tools
            # - those requests are passed down to the executor actor
            # - any non HTTP 200 response is a failure
            # - failure to acknowledge within the specified timeout will result in a HTTP 408 (REQUEST TIMEOUT)
            # - attempting to send a control request to a dead pod will result in a HTTP 410 (GONE)
            #
            @web.route('/control/<task>', methods=['POST'])
            @web.route('/control/<task>/<timeout>', methods=['POST'])
            def _control(task, timeout='60'):
                try:

                    ts = time.time()
                    logger.debug('http in -> /control/%s' % task)
                    latch = ThreadingFuture()
                    executor.tell({
                        'request': task,
                        'latch': latch,
                        'data': request.data
                    })
                    code = latch.get(timeout=int(timeout))
                    ms = time.time() - ts
                    logger.debug('http out -> HTTP %s (%d ms)' % (code, ms))
                    return '{}', code

                except Timeout:

                    #
                    # - we failed to match the specified timeout
                    # - gracefully fail on a HTTP 408
                    #
                    return '{}', 408

                except ActorDeadError:

                    #
                    # - the executor has been shutdown (probably after a /control/kill)
                    # - gracefully fail on a HTTP 410
                    #
                    return '{}', 410

            #
            # - internal hook required to shutdown the web-server
            # - it's not possible to do it outside of a request handler
            # - make sure this calls only comes from localhost (todo)
            #
            @web.route('/terminate', methods=['POST'])
            def _terminate():
                request.environ.get('werkzeug.server.shutdown')()
                return '{}', 200

            class _Runner(threading.Thread):
                """
                Run werkzeug from a separate thread to avoid blocking the main one. We'll have to shut it down
                using a dedicated HTTP POST.
                """
                def run(self):
                    web.run(host='0.0.0.0',
                            port=int(hints['port']),
                            threaded=True)

            try:

                #
                # - block on the lifecycle actor until it goes down (usually after a /control/kill request)
                #
                _Runner().start()
                spin_lock(latch)
                logger.debug('pod is dead, idling')

                #
                # - simply idle forever (since the framework would restart any container that terminates)
                # - /log and /hints HTTP requests will succeed (and show the pod as being killed)
                # - any control request will now fail
                #
                while 1:
                    time.sleep(60.0)

            finally:

                #
                # - when we exit the block first shutdown our executor (which may probably be already down)
                # - then shutdown the coordinator to un-register from zookeeper
                # - finally ask werkzeug to shutdown via a REST call
                #
                shutdown(executor)
                shutdown(coordinator)
                post('http://127.0.0.1:%s/terminate' % env['ochopod_port'])

        except KeyboardInterrupt:

            logger.fatal('CTRL-C pressed')

        except Exception as failure:

            logger.fatal('unexpected condition -> %s' % diagnostic(failure))