Beispiel #1
0
def terminateRemoteCluster(request):
    persistManager = request.state.persistManager

    cluster = yield persistManager.loadCluster(request.body['cluster_name'],
                                               request.body['user_name'])
    authToken = auth_token.generateToken(
        cluster.config('cluster.cluster_public_key'))

    credClient = cred_client.CredentialClient(cluster.credName, request.mq,
                                              request.state.conf)

    try:
        if cluster.master:
            wwwTerminateCluster = clusters_client_www.terminateCluster
            remoteTaskName = yield wwwTerminateCluster(
                cluster.master['public_dns'], 'local', None, authToken)
            localTask = yield tasks_tx.loadTask(request.body['task_name'])
            yield tasks_tx.blockOnTaskAndForward('localhost',
                                                 request.body['cluster_name'],
                                                 remoteTaskName, localTask)

    except errors.RemoteError, err:
        # If the error is not an auth token one then kill it
        # otherwise it means we think we own a cluster that
        # we don't
        #
        # In this case another part of the system is in charge
        # of forgetting about the clusters we shouldn't know
        if err.name != 'igs.utils.auth_token.AuthTokenError':
            log.err(err)
            yield terminateCluster(credClient, persistManager,
                                   request.body['cluster_name'],
                                   request.body['user_name'])
        else:
            raise
Beispiel #2
0
def terminateRemoteCluster(request):
    persistManager = request.state.persistManager

    cluster = yield persistManager.loadCluster(request.body["cluster_name"], request.body["user_name"])
    authToken = auth_token.generateToken(cluster.config("cluster.cluster_public_key"))

    credClient = cred_client.CredentialClient(cluster.credName, request.mq, request.state.conf)

    try:
        if cluster.master:
            wwwTerminateCluster = clusters_client_www.terminateCluster
            remoteTaskName = yield wwwTerminateCluster(cluster.master["public_dns"], "local", None, authToken)
            localTask = yield tasks_tx.loadTask(request.body["task_name"])
            yield tasks_tx.blockOnTaskAndForward("localhost", request.body["cluster_name"], remoteTaskName, localTask)

    except errors.RemoteError, err:
        # If the error is not an auth token one then kill it
        # otherwise it means we think we own a cluster that
        # we don't
        #
        # In this case another part of the system is in charge
        # of forgetting about the clusters we shouldn't know
        if err.name != "igs.utils.auth_token.AuthTokenError":
            log.err(err)
            yield terminateCluster(credClient, persistManager, request.body["cluster_name"], request.body["user_name"])
        else:
            raise
Beispiel #3
0
    def _parseInstances(importedClusters):
        """Parses out all instances associated with this cluster."""
        instances = []
        
        for cluster in importedClusters:
            master = cluster.get('master')

            if master and master.get('state') == 'running':
                config = json.loads(cluster.get('config'))
                srcCluster = config.get('general.src_cluster')

                clusterKey = config.get('cluster.cluster_public_key')
                authToken = auth_token.generateToken(clusterKey)

                remoteClusters = yield clusters_www.listClusters(master.get('public_dns'),
                                                                 {'cluster_name':  srcCluster},
                                                                 cluster.get('user_name'),
                                                                 authToken)
                remoteCluster = _formatLocalHostname(remoteClusters[0])

                instances.extend([instanceFromDict(x) for x 
                                  in [remoteCluster.get('master'), remoteCluster.get('exec_nodes')] 
                                  if x and x.get('state') == 'running'])

        defer.returnValue(instances)
Beispiel #4
0
def loadRemoteCluster(state, cl):
    """
    Tries to load a cluster.  Returns the cluster on success
    otherwise throws an error.

    If the cluster is not actually owned by us throws
    auth_token.AuthTokenError

    If it's unresponsive throws
    errors.RemoteError

    We also check for SSH being up and throw a RemoteError
    if it is not responsive
    """
    if cl.master:
        authToken = auth_token.generateToken(
            cl.config('cluster.cluster_public_key'))

        try:
            clusters = yield clusters_www_client.listClusters(
                cl.master['public_dns'], {'cluster_name': 'local'},
                None,
                authToken,
                timeout=10,
                tries=3)

            cluster = clusters[0]

            yield ssh.runProcessSSH(cl.master['public_dns'],
                                    'echo hello',
                                    stdoutf=None,
                                    stderrf=None,
                                    sshUser=state.machineConf('ssh.user'),
                                    sshFlags=state.machineConf('ssh.options'))

            defer.returnValue(cluster)
        except errors.RemoteError, err:
            if err.name == 'igs.utils.auth_token.AuthTokenError':
                raise auth_token.AuthTokenError()
            else:
                raise
        except commands.ProgramRunError:
            raise errors.RemoteError('SSH failed')
Beispiel #5
0
def loadRemoteCluster(state, cl):
    """
    Tries to load a cluster.  Returns the cluster on success
    otherwise throws an error.

    If the cluster is not actually owned by us throws
    auth_token.AuthTokenError

    If it's unresponsive throws
    errors.RemoteError

    We also check for SSH being up and throw a RemoteError
    if it is not responsive
    """
    if cl.master:
        authToken = auth_token.generateToken(cl.config('cluster.cluster_public_key'))

        try:
            clusters = yield clusters_www_client.listClusters(cl.master['public_dns'],
                                                              {'cluster_name': 'local'},
                                                              None,
                                                              authToken,
                                                              timeout=10,
                                                              tries=3)

            cluster = clusters[0]

            yield ssh.runProcessSSH(cl.master['public_dns'],
                                    'echo hello',
                                    stdoutf=None,
                                    stderrf=None,
                                    sshUser=state.machineConf('ssh.user'),
                                    sshFlags=state.machineConf('ssh.options'))

            defer.returnValue(cluster)
        except errors.RemoteError, err:
            if err.name == 'igs.utils.auth_token.AuthTokenError':
                raise auth_token.AuthTokenError()
            else:
                raise
        except commands.ProgramRunError:
            raise errors.RemoteError('SSH failed')
Beispiel #6
0
def updateClusterInfo(state):
    try:
        clusters = yield state.persistManager.loadClustersByAdmin({})
        updateUnresponsiveClusters(state.unresponsiveClusters,
                                   clusters)

        # Kinda sloppy to have to create this dictionary each time 
        clustersMap = dict([((cluster.clusterName, cluster.userName), cluster) 
                       for cluster in clusters])

        for (clusterName, userName), duration in state.unresponsiveClusters.iteritems():
            if duration > CLUSTER_TIMEOUT:
                cluster = clustersMap.get((clusterName, userName))

                log.msg('CLEANUP: Terminating cluster - ' + clusterName)
                authToken = auth_token.generateToken(cluster.config('cluster.cluster_public_key'))
                yield clusters_client.terminateCluster(clusterName, userName, authToken)
    except Exception, err:
        log.err('CLEANUP: Failed')
        log.err(err)
Beispiel #7
0
#!/usr/bin/env python

##
# Generates an authorization token for cluster communication given a file
import sys

from igs.utils import auth_token

key_file = sys.argv[1]

if not key_file:
    raise Exception('Must supply key file')

print auth_token.generateToken(key_file)
Beispiel #8
0
def importCluster(state, credClient, taskName, remoteHost, srcCluster,
                  cluster):
    """Handles retrieving metadata from the remote host and running through
    a battery of tests to ensure that the VM being imported is in a running 
    state and reachable.
    
    """
    @defer.inlineCallbacks
    def _saveCluster(instances):
        instances = yield credClient.updateInstances(instances)
        cl = yield state.persistManager.loadCluster(cluster.clusterName,
                                                    cluster.userName)

        cl = cl.setMaster(instances[0])
        yield state.persistManager.saveCluster(cl)
        defer.returnValue(func.Record(succeeded=instances, failed=[]))

    authToken = auth_token.generateToken(
        cluster.config('cluster.cluster_public_key'))
    remoteClusters = yield clusters_client_www.listClusters(
        remoteHost, {'cluster_name': srcCluster}, cluster.userName, authToken)
    remoteCluster = remoteClusters[0]

    if remoteCluster.get('state') in ['terminated', 'failed']:
        raise Error('Imported cluster in TERMINATED or FAILED state')

    # If we are importing a local cluster the public and private DNS will
    # not be valid hostnames that we can query. Need to set them to the
    # remote host provided in the import-clusters call
    if 'clovr-' in remoteCluster['master']['public_dns']:
        remoteCluster['master']['public_dns'] = remoteHost
        remoteCluster['master']['private_dns'] = remoteHost

    # Sorta hacky but we need to check whether or not a master node is
    # associated with the cluster being imported before proceeding
    _instances = yield waitForInstances([remoteCluster], [
        updateTask(taskName, 'Waiting for populated master node'),
        waitForPopulatedMasterNode(srcCluster, authToken, WAIT_FOR_STATE_TRIES)
    ])

    if not _instances.succeeded:
        raise Error('Could not retrieve master node from imported cluster.')

    baseConf = config.configFromMap(cluster.config.conf)
    remoteClusterConf = config.configFromMap(
        {'general.src_cluster': srcCluster}, base=baseConf)
    cl = cluster.update(config=remoteClusterConf)

    cl = cl.setMaster(remoteCluster.get('master'))
    yield state.persistManager.saveCluster(cl)

    log.msg('DEBUG importCluster: About to run tests on master node')

    _instances = yield waitForInstances([remoteCluster.get('master')], [
        updateTask(taskName, 'Waiting for master'),
        waitForState(credClient, 'running', WAIT_FOR_STATE_TRIES),
        _saveCluster,
        waitForSSH(cluster.config('ssh.user'), cluster.config('ssh.options'),
                   WAIT_FOR_SSH_TRIES), _saveCluster,
        updateTask(taskName, 'SSH up'),
        updateTask(taskName, 'Master in running state')
    ])

    if not _instances.succeeded:
        raise Error('Failed to import cluster')

    # TODO: Maybe implement another set of checks here on our exec nodes.
    if remoteCluster.get('exec_nodes'):
        cl = cl.addExecNodes(remoteCluster.get('exec_nodes'))
        yield state.persistManager.saveCluster(cl)

    cl = yield state.persistManager.loadCluster(cl.clusterName, cl.userName)
    cl = cl.setState(cl.RUNNING)
    yield state.persistManager.saveCluster(cl)

    defer.returnValue(cl)
Beispiel #9
0
def importCluster(state, credClient, taskName, remoteHost, srcCluster, cluster):
    """Handles retrieving metadata from the remote host and running through
    a battery of tests to ensure that the VM being imported is in a running 
    state and reachable.
    
    """
    @defer.inlineCallbacks
    def _saveCluster(instances):
        instances = yield credClient.updateInstances(instances)
        cl = yield state.persistManager.loadCluster(cluster.clusterName,
                                                    cluster.userName)

        cl = cl.setMaster(instances[0])
        yield state.persistManager.saveCluster(cl)
        defer.returnValue(func.Record(succeeded=instances,
                                      failed=[]))
    
    authToken = auth_token.generateToken(cluster.config('cluster.cluster_public_key'))
    remoteClusters = yield clusters_client_www.listClusters(remoteHost,
                                                            {'cluster_name': srcCluster},
                                                            cluster.userName,
                                                            authToken)
    remoteCluster = remoteClusters[0]

    if remoteCluster.get('state') in ['terminated', 'failed']:
        raise Error('Imported cluster in TERMINATED or FAILED state')

    # If we are importing a local cluster the public and private DNS will 
    # not be valid hostnames that we can query. Need to set them to the 
    # remote host provided in the import-clusters call
    if 'clovr-' in remoteCluster['master']['public_dns']:
        remoteCluster['master']['public_dns'] = remoteHost
        remoteCluster['master']['private_dns'] = remoteHost

    # Sorta hacky but we need to check whether or not a master node is 
    # associated with the cluster being imported before proceeding
    _instances = yield waitForInstances([remoteCluster], 
                                        [updateTask(taskName,
                                                    'Waiting for populated master node'),
                                         waitForPopulatedMasterNode(srcCluster,
                                                                    authToken,
                                                                    WAIT_FOR_STATE_TRIES)])
                                                                    
    if not _instances.succeeded:
        raise Error('Could not retrieve master node from imported cluster.')

    baseConf = config.configFromMap(cluster.config.conf)
    remoteClusterConf = config.configFromMap({'general.src_cluster': srcCluster},
                                             base=baseConf)
    cl = cluster.update(config=remoteClusterConf)

    cl = cl.setMaster(remoteCluster.get('master')) 
    yield state.persistManager.saveCluster(cl)

    log.msg('DEBUG importCluster: About to run tests on master node')

    _instances = yield waitForInstances([remoteCluster.get('master')],
                                       [updateTask(taskName,
                                                   'Waiting for master'),
                                        waitForState(credClient,
                                                     'running',
                                                     WAIT_FOR_STATE_TRIES),
                                        _saveCluster,
                                        waitForSSH(cluster.config('ssh.user'),
                                                   cluster.config('ssh.options'),
                                                   WAIT_FOR_SSH_TRIES),
                                        _saveCluster,
                                        updateTask(taskName,
                                                   'SSH up'),
                                        updateTask(taskName,
                                                   'Master in running state')])

    if not _instances.succeeded:
        raise Error('Failed to import cluster')

    # TODO: Maybe implement another set of checks here on our exec nodes.
    if remoteCluster.get('exec_nodes'):
        cl = cl.addExecNodes(remoteCluster.get('exec_nodes'))
        yield state.persistManager.saveCluster(cl)

    cl = yield state.persistManager.loadCluster(cl.clusterName,
                                                cl.userName)
    cl = cl.setState(cl.RUNNING)
    yield state.persistManager.saveCluster(cl)
    
    defer.returnValue(cl)