Esempio n. 1
0
def _setQueue(taskName, batchState):
    yield _blockOnTask(taskName)

    cluster = yield loadCluster(
        'localhost', batchState['pipeline_config']['cluster.CLUSTER_NAME'],
        'guest')

    yield defer_utils.tryUntil(
        10,
        lambda: _getOutput(batchState, [
            '/opt/clovr_pipelines/workflow/project_saved_templates/clovr_lgt_wrapper/set_queue.sh',
            cluster['master']['public_dns']
        ],
                           log=True),
        onFailure=defer_utils.sleep(2))

    conf = config.configFromStream(open('/tmp/machine.conf'))

    # Remove autoshutdown, we want no part of that
    yield ssh.runProcessSSH(cluster['master']['public_dns'],
                            'rm -v /var/vappio/runtime/noautoshutdown',
                            stdoutf=None,
                            stderrf=None,
                            sshUser=conf('ssh.user'),
                            sshFlags=conf('ssh.options'),
                            log=True)
Esempio n. 2
0
def runInstances(credClient,
                 ami,
                 key,
                 iType,
                 groups,
                 availZone,
                 bidPrice,
                 minInstances,
                 maxInstances,
                 userData):
    def _runInstances(num):
        if bidPrice:
            return credClient.runSpotInstances(bidPrice=bidPrice,
                                               ami=ami,
                                               key=key,
                                               instanceType=iType,
                                               groups=groups,
                                               availabilityZone=availZone,
                                               numInstances=num,
                                               userData=userData)
        else:
            return credClient.runInstances(ami=ami,
                                           key=key,
                                           instanceType=iType,
                                           groups=groups,
                                           availabilityZone=availZone,
                                           numInstances=num,
                                           userData=userData)

    instances = []
    @defer.inlineCallbacks
    def _startInstances():
        startedInstances = yield _runInstances(maxInstances - len(instances))
        instances.extend(startedInstances)
        if len(instances) < minInstances:
            raise InstanceStartError('Wanted %d instances got %d' %
                                     (maxInstances - len(instances),
                                      len(startedInstances)))


    try:
        yield defer_utils.tryUntil(RUN_INSTANCE_TRIES,
                                   _startInstances,
                                   onFailure=defer_utils.sleep(30))
    except Exception, err:
        ## If we got an exception then terminate any instances
        ## that were started and reraise exception.
        ## The last thing we want is to leak instances
        ##
        ## This is not completely safe!  We should probably
        ## raise an exception with the started instances in it
        ## and let the caller decide what to do with them
        log.err('Error starting instances')
        log.err(err)
        defer_utils.mapSerial(lambda iChunk :
                                  credClient.terminateInstances(iChunk),
                              func.chunk(5, instances))
Esempio n. 3
0
    def initialize(self):
        cacheId = lambda d: func.updateDict(d, {"_id": d["user_name"] + "_" + d["pipeline_name"]})

        self.cache = yield mongo_cache.createCache("pipelines_cache", cacheId)

        self.persistManager.addDependent(self)
        self.tagNotify.addDependent(self)

        pipelines = yield defer_utils.tryUntil(
            10, lambda: self.persistManager.loadAllPipelinesByAdmin({}), onFailure=defer_utils.sleep(2)
        )
        for pipeline in pipelines:
            self.workQueue.add(self._pipelineToDictAndCache, "load", pipeline)
Esempio n. 4
0
def subscribe(mq, state):
    yield defer_utils.tryUntil(10,
                               lambda : _monitorAnyPipelines(mq, state),
                               onFailure=defer_utils.sleep(2))
    
    processPipelineList = queue.returnResponse(defer_pipe.pipe([queue.keysInBody(['cluster',
                                                                                  'user_name']),
                                                                _forwardToCluster(state.conf, state.conf('pipelines.list_www')),
                                                                handleWWWPipelineList]))
    queue.subscribe(mq,
                    state.conf('pipelines.list_www'),
                    state.conf('pipelines.concurrent_list'),
                    queue.wrapRequestHandler(state, processPipelineList))
Esempio n. 5
0
def subscribe(mq, state):
    yield defer_utils.tryUntil(10,
                               lambda: _monitorAnyPipelines(mq, state),
                               onFailure=defer_utils.sleep(2))

    processPipelineList = queue.returnResponse(
        defer_pipe.pipe([
            queue.keysInBody(['cluster', 'user_name']),
            _forwardToCluster(state.conf, state.conf('pipelines.list_www')),
            handleWWWPipelineList
        ]))
    queue.subscribe(mq, state.conf('pipelines.list_www'),
                    state.conf('pipelines.concurrent_list'),
                    queue.wrapRequestHandler(state, processPipelineList))
Esempio n. 6
0
def runInstances(credClient, ami, key, iType, groups, availZone, bidPrice,
                 minInstances, maxInstances, userData):
    def _runInstances(num):
        if bidPrice:
            return credClient.runSpotInstances(bidPrice=bidPrice,
                                               ami=ami,
                                               key=key,
                                               instanceType=iType,
                                               groups=groups,
                                               availabilityZone=availZone,
                                               numInstances=num,
                                               userData=userData)
        else:
            return credClient.runInstances(ami=ami,
                                           key=key,
                                           instanceType=iType,
                                           groups=groups,
                                           availabilityZone=availZone,
                                           numInstances=num,
                                           userData=userData)

    instances = []

    @defer.inlineCallbacks
    def _startInstances():
        startedInstances = yield _runInstances(maxInstances - len(instances))
        instances.extend(startedInstances)
        if len(instances) < minInstances:
            raise InstanceStartError(
                'Wanted %d instances got %d' %
                (maxInstances - len(instances), len(startedInstances)))

    try:
        yield defer_utils.tryUntil(RUN_INSTANCE_TRIES,
                                   _startInstances,
                                   onFailure=defer_utils.sleep(30))
    except Exception, err:
        ## If we got an exception then terminate any instances
        ## that were started and reraise exception.
        ## The last thing we want is to leak instances
        ##
        ## This is not completely safe!  We should probably
        ## raise an exception with the started instances in it
        ## and let the caller decide what to do with them
        log.err('Error starting instances')
        log.err(err)
        defer_utils.mapSerial(
            lambda iChunk: credClient.terminateInstances(iChunk),
            func.chunk(5, instances))
Esempio n. 7
0
    def initialize(self):
        cacheId = lambda d : func.updateDict(d, {'_id': d['user_name'] +
                                                 '_' +
                                                 d['pipeline_name']})
        
        self.cache = yield mongo_cache.createCache('pipelines_cache',
                                                   cacheId)

        self.persistManager.addDependent(self)
        self.tagNotify.addDependent(self)

        pipelines = yield defer_utils.tryUntil(10,
                                               lambda : self.persistManager.loadAllPipelinesByAdmin({}),
                                               onFailure=defer_utils.sleep(2))
        for pipeline in pipelines:
            self.workQueue.add(self._pipelineToDictAndCache, 'load', pipeline)
Esempio n. 8
0
def performQueryNoParse(host, url, var, headers=None, timeout=30, tries=4, debug=False):
    if tries == 0:
        raise RetriesFailed()

    if headers is None:
        headers = {}
    
    d = defer_utils.tryUntil(tries,
                             lambda : getPage(('http://' + host + url).encode('utf_8'),
                                              method='POST',
                                              postdata=urllib.urlencode({'request': json.dumps(var)}),
                                              headers=func.updateDict(headers, {'Content-Type': 'application/x-www-form-urlencoded'}),
                                              connectionTimeout=timeout,
                                              timeout=timeout),
                             onFailure=defer_utils.sleep(10))

    def _error(f):
        log.err(f)
        return f
    
    d.addErrback(_error)
    return d
Esempio n. 9
0
def _setQueue(taskName, batchState):
    yield _blockOnTask(taskName)

    cluster = yield loadCluster('localhost',
                                batchState['pipeline_config']['cluster.CLUSTER_NAME'],
                                'guest')

    yield defer_utils.tryUntil(10,
                               lambda : _getOutput(batchState,
                                                   ['/opt/clovr_pipelines/workflow/project_saved_templates/clovr_lgt_wrapper/set_queue.sh',
                                                    cluster['master']['public_dns']],
                                                   log=True),
                               onFailure=defer_utils.sleep(2))

    conf = config.configFromStream(open('/tmp/machine.conf'))
    
    # Remove autoshutdown, we want no part of that
    yield ssh.runProcessSSH(cluster['master']['public_dns'],
                            'rm -v /var/vappio/runtime/noautoshutdown',
                            stdoutf=None,
                            stderrf=None,
                            sshUser=conf('ssh.user'),
                            sshFlags=conf('ssh.options'),
                            log=True)
Esempio n. 10
0
def instantiateCredential(conf, cred):
    if not conf('config_loaded', default=False):
        conf = config.configFromConfig(conf,
                                       base=config.configFromStream(open(conf('conf_file')),
                                                                    base=conf))

    certFile = os.path.join(conf('general.secure_tmp'), cred.name + '_cert.pem')
    keyFile = os.path.join(conf('general.secure_tmp'), cred.name + '_key.pem')

    mainDeferred = defer.succeed(None)
    
    if not os.path.exists(certFile) and not os.path.exists(keyFile):
        tmpCertFile = os.path.join(conf('general.secure_tmp'), cred.name + '_cert-tmp.pem')
        tmpKeyFile = os.path.join(conf('general.secure_tmp'), cred.name + '_key-tmp.pem')
        if 'ec2_url' not in cred.metadata:
            return defer.fail(Exception('You must have an ec2_url'))
        parsedUrl = urlparse.urlparse(cred.metadata['ec2_url'])
        if ':' not in parsedUrl.netloc:
            return defer.fail(Exception('Your URL must contain a port'))
        host, port = parsedUrl.netloc.split(':')
        fout = open(tmpCertFile, 'w')
        fout.write(cred.cert)
        fout.close()
        fout = open(tmpKeyFile, 'w')
        fout.write(cred.pkey)
        fout.close()
        d = commands.runProcess(['nimbusCerts2EC2.py',
                                 '--in-cert=' + tmpCertFile,
                                 '--out-cert=' + certFile,
                                 '--in-key=' + tmpKeyFile,
                                 '--out-key=' + keyFile,
                                 '--java-cert-dir=/tmp',
                                 '--java-cert-host=' + host,
                                 '--java-cert-port=' + port],
                                stdoutf=None,
                                stderrf=None,
                                log=True)

        def _chmod(_exitCode):
            return commands.runProcess(['chmod', '+r', keyFile], stdoutf=None, stderrf=None)

        d.addCallback(_chmod)

        def _unlink(v):
            os.unlink(tmpCertFile)
            os.unlink(tmpKeyFile)
            return v

        d.addCallback(_unlink)
        d.addErrback(_unlink)

        mainDeferred.addCallback(lambda _ : d)
        
    ec2Home = cred.metadata.get('ec2_api_tools', '/opt/ec2-api-tools-1.3-57419')
    newCred = func.Record(name=cred.name, conf=conf, cert=certFile, pkey=keyFile, ec2Path=os.path.join(ec2Home, 'bin'),
                          env=dict(EC2_JVM_ARGS='-Djavax.net.ssl.trustStore=/tmp/jssecacerts',
                                   EC2_HOME=ec2Home,
                                   EC2_URL=cred.metadata['ec2_url']))

    if os.path.exists(conf('cluster.cluster_private_key') + '.pub'):
        pubKey = open(conf('cluster.cluster_private_key') + '.pub').read().rstrip()
        def _addKeypair():
            keyPairDefer = ec2.addKeypair(newCred, conf('cluster.key') + '||' + pubKey)
            def _printError(f):
                log.msg('Adding keypair failed, retrying')
                log.err(f)
                return f
            keyPairDefer.addErrback(_printError)
            return keyPairDefer
        mainDeferred.addCallback(lambda _ : defer_utils.tryUntil(10, _addKeypair, onFailure=defer_utils.sleep(30)))
        
    mainDeferred.addCallback(lambda _ : newCred)
    return mainDeferred
Esempio n. 11
0
 def _(*args, **kwargs):
     return defer_utils.tryUntil(n,
                                 lambda : f(*args, **kwargs),
                                 onFailure=defer_utils.sleep(30),
                                 retry=retryIfTTLError)
Esempio n. 12
0
def instantiateCredential(conf, cred):
    if not conf('config_loaded', default=False):
        conf = config.configFromConfig(conf,
                                       base=config.configFromStream(open(
                                           conf('conf_file')),
                                                                    base=conf))

    certFile = os.path.join(conf('general.secure_tmp'),
                            cred.name + '_cert.pem')
    keyFile = os.path.join(conf('general.secure_tmp'), cred.name + '_key.pem')

    mainDeferred = defer.succeed(None)

    if not os.path.exists(certFile) and not os.path.exists(keyFile):
        tmpCertFile = os.path.join(conf('general.secure_tmp'),
                                   cred.name + '_cert-tmp.pem')
        tmpKeyFile = os.path.join(conf('general.secure_tmp'),
                                  cred.name + '_key-tmp.pem')
        if 'ec2_url' not in cred.metadata:
            return defer.fail(Exception('You must have an ec2_url'))
        parsedUrl = urlparse.urlparse(cred.metadata['ec2_url'])
        if ':' not in parsedUrl.netloc:
            return defer.fail(Exception('Your URL must contain a port'))
        host, port = parsedUrl.netloc.split(':')
        fout = open(tmpCertFile, 'w')
        fout.write(cred.cert)
        fout.close()
        fout = open(tmpKeyFile, 'w')
        fout.write(cred.pkey)
        fout.close()
        d = commands.runProcess([
            'nimbusCerts2EC2.py', '--in-cert=' + tmpCertFile,
            '--out-cert=' + certFile, '--in-key=' + tmpKeyFile,
            '--out-key=' + keyFile, '--java-cert-dir=/tmp',
            '--java-cert-host=' + host, '--java-cert-port=' + port
        ],
                                stdoutf=None,
                                stderrf=None,
                                log=True)

        def _chmod(_exitCode):
            return commands.runProcess(['chmod', '+r', keyFile],
                                       stdoutf=None,
                                       stderrf=None)

        d.addCallback(_chmod)

        def _unlink(v):
            os.unlink(tmpCertFile)
            os.unlink(tmpKeyFile)
            return v

        d.addCallback(_unlink)
        d.addErrback(_unlink)

        mainDeferred.addCallback(lambda _: d)

    ec2Home = cred.metadata.get('ec2_api_tools',
                                '/opt/ec2-api-tools-1.3-57419')
    newCred = func.Record(
        name=cred.name,
        conf=conf,
        cert=certFile,
        pkey=keyFile,
        ec2Path=os.path.join(ec2Home, 'bin'),
        env=dict(EC2_JVM_ARGS='-Djavax.net.ssl.trustStore=/tmp/jssecacerts',
                 EC2_HOME=ec2Home,
                 EC2_URL=cred.metadata['ec2_url']))

    if os.path.exists(conf('cluster.cluster_private_key') + '.pub'):
        pubKey = open(conf('cluster.cluster_private_key') +
                      '.pub').read().rstrip()

        def _addKeypair():
            keyPairDefer = ec2.addKeypair(newCred,
                                          conf('cluster.key') + '||' + pubKey)

            def _printError(f):
                log.msg('Adding keypair failed, retrying')
                log.err(f)
                return f

            keyPairDefer.addErrback(_printError)
            return keyPairDefer

        mainDeferred.addCallback(lambda _: defer_utils.tryUntil(
            10, _addKeypair, onFailure=defer_utils.sleep(30)))

    mainDeferred.addCallback(lambda _: newCred)
    return mainDeferred
Esempio n. 13
0
 def _(*args, **kwargs):
     return defer_utils.tryUntil(n,
                                 lambda: f(*args, **kwargs),
                                 onFailure=defer_utils.sleep(30),
                                 retry=retryIfTTLError)