def _handleTerminateInstances(request):
    yield tasks_tx.updateTask(request.body['task_name'],
                              lambda t: t.setState(tasks_tx.task.TASK_RUNNING))

    persistManager = request.state.persistManager
    cluster = yield persistManager.loadCluster(request.body['cluster_name'],
                                               request.body['user_name'])
    credClient = cred_client.CredentialClient(cluster.credName, request.mq,
                                              request.state.conf)
    if request.body['cluster_name'] != 'local':
        try:
            remoteTaskName = yield clusters_client_www.terminateInstances(
                cluster.master['public_dns'], 'local',
                request.body['user_name'], request.body['by_attribute'],
                request.body['attribute_values'])

            localTask = yield tasks_tx.loadTask(request.body['task_name'])
            yield tasks_tx.blockOnTaskAndForward('localhost',
                                                 request.body['cluster_name'],
                                                 remoteTaskName, localTask)
        except:
            yield terminateInstancesByAttribute(
                persistManager, credClient, request.body['cluster_name'],
                request.body['user_name'], request.body['by_attribute'],
                request.body['attribute_values'])

    else:
        cl = yield terminateInstancesByAttribute(
            persistManager, credClient, 'local', None,
            request.body['by_attribute'], request.body['attribute_values'])

    yield tasks_tx.updateTask(request.body['task_name'],
                              lambda t: t.progress())

    defer.returnValue(request)
Example #2
0
def getOrphans(credentialName, state):
    @defer.inlineCallbacks
    def _sshAble(instance):
        sshUser = state.clustersState.machineConf('ssh.user')
        sshOptions = state.clustersState.machineConf('ssh.options')

        try:
            yield ssh.runProcessSSH(instance['public_dns'], 'echo hello', None,
                                    None, sshUser, sshOptions)
        except:
            defer.returnValue(False)

        defer.returnValue(True)

    credClient = cred_client.CredentialClient(credentialName, state.mq,
                                              state.clustersState.conf)

    knownInstances = yield getKnownInstances(credentialName)

    instances = yield credClient.listInstances()

    instances = [
        i for i in instances if i['instance_id'] not in knownInstances
    ]

    sshAble = yield defer_utils.mapPar(_sshAble, instances, parallel=5)

    ret = set(
        [i['instance_id'] for canSsh, i in zip(sshAble, instances) if canSsh])

    defer.returnValue(ret)
Example #3
0
def terminateRemoteCluster(request):
    persistManager = request.state.persistManager

    cluster = yield persistManager.loadCluster(request.body['cluster_name'],
                                               request.body['user_name'])
    authToken = auth_token.generateToken(
        cluster.config('cluster.cluster_public_key'))

    credClient = cred_client.CredentialClient(cluster.credName, request.mq,
                                              request.state.conf)

    try:
        if cluster.master:
            wwwTerminateCluster = clusters_client_www.terminateCluster
            remoteTaskName = yield wwwTerminateCluster(
                cluster.master['public_dns'], 'local', None, authToken)
            localTask = yield tasks_tx.loadTask(request.body['task_name'])
            yield tasks_tx.blockOnTaskAndForward('localhost',
                                                 request.body['cluster_name'],
                                                 remoteTaskName, localTask)

    except errors.RemoteError, err:
        # If the error is not an auth token one then kill it
        # otherwise it means we think we own a cluster that
        # we don't
        #
        # In this case another part of the system is in charge
        # of forgetting about the clusters we shouldn't know
        if err.name != 'igs.utils.auth_token.AuthTokenError':
            log.err(err)
            yield terminateCluster(credClient, persistManager,
                                   request.body['cluster_name'],
                                   request.body['user_name'])
        else:
            raise
Example #4
0
def _handleStartCluster(request):
    persistManager = request.state.persistManager

    yield tasks_tx.updateTask(request.body['task_name'],
                              lambda t: t.setState(tasks_tx.task.TASK_RUNNING))

    cluster = yield persistManager.loadCluster(request.body['cluster_name'],
                                               request.body['user_name'])

    cluster = cluster.update(startTask=request.body['task_name'])

    credClient = cred_client.CredentialClient(cluster.credName, request.mq,
                                              request.state.conf)

    try:
        cluster = yield instance_flow.startMaster(request.state, credClient,
                                                  request.body['task_name'],
                                                  cluster)
    except Exception, err:
        log.err('STARTCLUSETER: Failed')
        log.err(err)
        cluster = yield request.state.persistManager.loadCluster(
            request.body['cluster_name'], request.body['user_name'])
        cluster = cluster.setState(cluster.FAILED)
        yield defer_utils.sleep(120)()
        yield request.state.persistManager.removeCluster(
            request.body['cluster_name'], request.body['user_name'])
        raise err
Example #5
0
def _handleTerminateCluster(request):
    # Start task running
    yield tasks_tx.updateTask(request.body['task_name'],
                              lambda t: t.setState(tasks_tx.task.TASK_RUNNING))

    persistManager = request.state.persistManager

    credClient = cred_client.CredentialClient('local', request.mq,
                                              request.state.conf)
    if request.body['cluster_name'] != 'local':
        cluster = yield terminateRemoteCluster(request)
        yield persistManager.saveCluster(cluster)

        removeTerminatedCluster(persistManager, credClient,
                                request.body['cluster_name'],
                                request.body['user_name'])

    else:
        if ('auth_token' in request.body
                and auth_token.validateToken(request.body['auth_token'])):
            yield terminateCluster(credClient, persistManager, 'local',
                                   request.body['user_name'])
            removeTerminatedCluster(persistManager, credClient,
                                    request.body['cluster_name'],
                                    request.body['user_name'])
        else:
            raise auth_token.AuthTokenError()

    yield tasks_tx.updateTask(request.body['task_name'],
                              lambda t: t.progress())

    defer.returnValue(request)
Example #6
0
def t_credential(state, value, _params):
    """
    Ensure a credential exists
    """
    credClient = cred_client.CredentialClient(value, state.mq, state.conf)

    try:
        yield credClient.listInstances()
        defer.returnValue(value)
    except Exception:
        raise InvalidPipelineValue('"%s" is not a valid credential' %
                                   str(value))
Example #7
0
def _handleAddInstances(request):
    yield tasks_tx.updateTask(request.body['task_name'],
                              lambda t: t.setState(tasks_tx.task.TASK_RUNNING))

    cluster = yield request.state.persistManager.loadCluster(
        request.body['cluster'], request.body['user_name'])

    credClient = cred_client.CredentialClient(cluster.credName, request.mq,
                                              request.state.conf)

    cType = yield credClient.getCType()

    if cType != 'local':
        if request.body['num_exec'] > 0:
            yield instance_flow.startExecs(
                request.state, credClient, request.body['task_name'],
                request.body['num_exec'],
                request.body.get('exec_instance_type', None), cluster)

    defer.returnValue(request)
Example #8
0
def _handleImportCluster(request):
    """Imports a VM found on a remote host."""
    persistManager = request.state.persistManager

    yield tasks_tx.updateTask(request.body['task_name'],
                              lambda t: t.setState(tasks_tx.task.TASK_RUNNING))

    cluster = yield request.state.persistManager.loadCluster(
        request.body['dst_cluster'], request.body['user_name'])

    cluster = cluster.update(startTask=request.body['task_name'])

    credClient = cred_client.CredentialClient(cluster.credName, request.mq,
                                              request.state.conf)

    try:
        cluster = yield instance_flow.importCluster(
            request.state, credClient, request.body['task_name'],
            request.body['host'], request.body['src_cluster'], cluster)

    except Exception, err:
        if isinstance(err, auth_token.AuthTokenError):
            log.err('IMPORTCLUSTER: Authorization failed')
        else:
            log.err('IMPORTCLUSTER: Failed')
            log.err(err)

        cluster = yield request.state.persistManager.loadCluster(
            request.body['dst_cluster'], request.body['user_name'])
        log.msg('DEBUG importcluster.py: cluster -', cluster)

        cluster = cluster.setState(cluster.FAILED)
        yield defer_utils.sleep(120)()

        log.msg('DEBUG importcluster.py: About to remove cluster')

        yield request.state.persistManager.removeCluster(
            request.body['dst_cluster'], request.body['user_name'])
        raise err
Example #9
0
def refreshInstances(mq, state):
    """Refreshes the local clusters instances (exec + data nodes). 
    
    Any instances that have been terminated, are unresponsive, or disappeared 
    since our last refresh are considered terminated and will be removed from 
    the local clusters instance list.
    
    """
    persistManager = state.persistManager
    updatedExecNodes = []
    updatedDataNodes = []

    try:
        cluster = yield persistManager.loadCluster('local', None)
        credClient = cred_client.CredentialClient(cluster.credName, mq,
                                                  state.conf)

        # Want to make sure we only are doing this on running or unresponsive
        # clusters; terminated clusters will be cleaned up elsewhere
        if cluster.state in [cluster.RUNNING, cluster.UNRESPONSIVE]:
            clExecNodes = cluster.execNodes
            clDataNodes = cluster.dataNodes

            instances = yield credClient.updateInstances(clExecNodes +
                                                         clDataNodes)
            updatedExecNodes.extend([x for x in instances if x in clExecNodes])
            updatedDataNodes.extend([x for x in instances if x in clDataNodes])

            cluster = cluster.updateExecNodes(updatedExecNodes)
            cluster = cluster.updateDataNodes(updatedDataNodes)

            yield persistManager.saveCluster(cluster)
    except Exception as err:
        log.msg('INSTANCES REFRESH: Error')
        log.err(err)

    reactor.callLater(INSTANCE_REFRESH_FREQUENCY, refreshInstances, mq, state)
Example #10
0
def terminateInstances(credentialName, instances, state):
    credClient = cred_client.CredentialClient(credentialName, state.mq,
                                              state.clustersState.conf)

    yield credClient.terminateInstances(instances)
Example #11
0
from twisted.application import service
from twisted.python import log

from igs.utils import config

from vappio_tx.mq import client
from vappio_tx.internal_client import credentials

conf = config.configFromStream(open('/mnt/vappio-conf/vappio_apps.conf'))

application = service.Application('test')

s1 = client.makeService(conf)
s1.setServiceParent(application)

cc = credentials.CredentialClient('diag', s1.mqFactory, conf)

d = cc.listInstances()


def _terminate(instances):
    print 'Num instances:', len(instances)
    instances = instances[:3]
    print 'Shutting down: ', instances
    return cc.terminateInstances(instances)


def _print(foo):
    print 'Foo:', foo

Example #12
0
def loadLocalCluster(mq, state):
    """
    If local cluster is not present, load it
    """
    def _credential():
        if os.path.exists('/tmp/cred-info'):
            cert, pkey, ctype, metadata = open('/tmp/cred-info').read().split(
                '\t')
            return {
                'name':
                'local',
                'desc':
                'Local credential',
                'ctype':
                ctype,
                'cert':
                open(cert).read(),
                'pkey':
                open(pkey).read(),
                'metadata':
                metadata
                and dict([v.split('=', 1) for v in metadata.split(',')]) or {},
                'conf':
                config.configFromStream(open('/tmp/machine.conf'), lazy=True)
            }
        else:
            return {
                'name': 'local',
                'desc': 'Local credential',
                'ctype': 'local',
                'cert': None,
                'pkey': None,
                'metadata': {},
                'conf': config.configFromMap({})
            }

    try:
        cluster = yield state.persistManager.loadCluster('local', None)

        baseConf = config.configFromStream(open('/tmp/machine.conf'),
                                           base=config.configFromEnv())

        conf = config.configFromMap(
            {
                'config_loaded': True,
                'cluster.cluster_public_key': '/mnt/keys/devel1.pem.pub'
            },
            base=baseConf)

        if (cluster.credName == 'local' and conf('MASTER_IP') not in [
                cluster.master['public_dns'], cluster.master['private_dns']
        ]):
            master = dict(instance_id='local',
                          ami_id=None,
                          public_dns=conf('MASTER_IP'),
                          private_dns=conf('MASTER_IP'),
                          state='running',
                          key=None,
                          index=None,
                          instance_type=None,
                          launch=None,
                          availability_zone=None,
                          monitor=None,
                          spot_request_id=None,
                          bid_price=None)
            cluster = cluster.setMaster(master).update(config=conf)
            yield state.persistManager.saveCluster(cluster)

        defer.returnValue(cluster)
    except persist.ClusterNotFoundError:
        credential = _credential()

        credTaskName = yield cred_client.saveCredential(
            credential['name'], credential['desc'], credential['ctype'],
            credential['cert'], credential['pkey'], credential['metadata'],
            credential['conf'])

        ## Wait for credential to be added.
        ## TODO: Should handle failure here
        yield tasks_tx.blockOnTask('localhost', 'local', credTaskName)

        credClient = cred_client.CredentialClient('local', mq, state.conf)

        ## If it isn't a local ctype then we need to wait for
        ## the credential to come alive
        if credential['ctype'] != 'local':
            instances = yield credClient.listInstances()
        else:
            instances = []

        baseConf = config.configFromStream(open('/tmp/machine.conf'),
                                           base=config.configFromEnv())
        conf = config.configFromMap(
            {
                'config_loaded': True,
                'cluster.cluster_public_key': '/mnt/keys/devel1.pem.pub'
            },
            base=baseConf)
        cluster = persist.Cluster('local', None, 'local', conf)

        startTaskName = yield tasks_tx.createTaskAndSave('startCluster', 1)
        yield tasks_tx.updateTask(
            startTaskName,
            lambda t: t.setState(tasks_tx.task.TASK_COMPLETED).progress())

        cluster = cluster.update(startTask=startTaskName)

        masterIp = cluster.config('MASTER_IP')
        masterIdx = func.find(
            lambda i: masterIp in [i['public_dns'], i['private_dns']],
            instances)

        if masterIdx is not None:
            master = instances[masterIdx]
        else:
            master = dict(instance_id='local',
                          ami_id=None,
                          public_dns=masterIp,
                          private_dns=masterIp,
                          state='running',
                          key=None,
                          index=None,
                          instance_type=None,
                          launch=None,
                          availability_zone=None,
                          monitor=None,
                          spot_request_id=None,
                          bid_price=None)

        cluster = cluster.setMaster(master)
        cluster = cluster.setState(cluster.RUNNING)
        yield state.persistManager.saveCluster(cluster)
        defer.returnValue(cluster)