def terminateRemoteCluster(request): persistManager = request.state.persistManager cluster = yield persistManager.loadCluster(request.body['cluster_name'], request.body['user_name']) authToken = auth_token.generateToken( cluster.config('cluster.cluster_public_key')) credClient = cred_client.CredentialClient(cluster.credName, request.mq, request.state.conf) try: if cluster.master: wwwTerminateCluster = clusters_client_www.terminateCluster remoteTaskName = yield wwwTerminateCluster( cluster.master['public_dns'], 'local', None, authToken) localTask = yield tasks_tx.loadTask(request.body['task_name']) yield tasks_tx.blockOnTaskAndForward('localhost', request.body['cluster_name'], remoteTaskName, localTask) except errors.RemoteError, err: # If the error is not an auth token one then kill it # otherwise it means we think we own a cluster that # we don't # # In this case another part of the system is in charge # of forgetting about the clusters we shouldn't know if err.name != 'igs.utils.auth_token.AuthTokenError': log.err(err) yield terminateCluster(credClient, persistManager, request.body['cluster_name'], request.body['user_name']) else: raise
def terminateRemoteCluster(request): persistManager = request.state.persistManager cluster = yield persistManager.loadCluster(request.body["cluster_name"], request.body["user_name"]) authToken = auth_token.generateToken(cluster.config("cluster.cluster_public_key")) credClient = cred_client.CredentialClient(cluster.credName, request.mq, request.state.conf) try: if cluster.master: wwwTerminateCluster = clusters_client_www.terminateCluster remoteTaskName = yield wwwTerminateCluster(cluster.master["public_dns"], "local", None, authToken) localTask = yield tasks_tx.loadTask(request.body["task_name"]) yield tasks_tx.blockOnTaskAndForward("localhost", request.body["cluster_name"], remoteTaskName, localTask) except errors.RemoteError, err: # If the error is not an auth token one then kill it # otherwise it means we think we own a cluster that # we don't # # In this case another part of the system is in charge # of forgetting about the clusters we shouldn't know if err.name != "igs.utils.auth_token.AuthTokenError": log.err(err) yield terminateCluster(credClient, persistManager, request.body["cluster_name"], request.body["user_name"]) else: raise
def _parseInstances(importedClusters): """Parses out all instances associated with this cluster.""" instances = [] for cluster in importedClusters: master = cluster.get('master') if master and master.get('state') == 'running': config = json.loads(cluster.get('config')) srcCluster = config.get('general.src_cluster') clusterKey = config.get('cluster.cluster_public_key') authToken = auth_token.generateToken(clusterKey) remoteClusters = yield clusters_www.listClusters(master.get('public_dns'), {'cluster_name': srcCluster}, cluster.get('user_name'), authToken) remoteCluster = _formatLocalHostname(remoteClusters[0]) instances.extend([instanceFromDict(x) for x in [remoteCluster.get('master'), remoteCluster.get('exec_nodes')] if x and x.get('state') == 'running']) defer.returnValue(instances)
def loadRemoteCluster(state, cl): """ Tries to load a cluster. Returns the cluster on success otherwise throws an error. If the cluster is not actually owned by us throws auth_token.AuthTokenError If it's unresponsive throws errors.RemoteError We also check for SSH being up and throw a RemoteError if it is not responsive """ if cl.master: authToken = auth_token.generateToken( cl.config('cluster.cluster_public_key')) try: clusters = yield clusters_www_client.listClusters( cl.master['public_dns'], {'cluster_name': 'local'}, None, authToken, timeout=10, tries=3) cluster = clusters[0] yield ssh.runProcessSSH(cl.master['public_dns'], 'echo hello', stdoutf=None, stderrf=None, sshUser=state.machineConf('ssh.user'), sshFlags=state.machineConf('ssh.options')) defer.returnValue(cluster) except errors.RemoteError, err: if err.name == 'igs.utils.auth_token.AuthTokenError': raise auth_token.AuthTokenError() else: raise except commands.ProgramRunError: raise errors.RemoteError('SSH failed')
def loadRemoteCluster(state, cl): """ Tries to load a cluster. Returns the cluster on success otherwise throws an error. If the cluster is not actually owned by us throws auth_token.AuthTokenError If it's unresponsive throws errors.RemoteError We also check for SSH being up and throw a RemoteError if it is not responsive """ if cl.master: authToken = auth_token.generateToken(cl.config('cluster.cluster_public_key')) try: clusters = yield clusters_www_client.listClusters(cl.master['public_dns'], {'cluster_name': 'local'}, None, authToken, timeout=10, tries=3) cluster = clusters[0] yield ssh.runProcessSSH(cl.master['public_dns'], 'echo hello', stdoutf=None, stderrf=None, sshUser=state.machineConf('ssh.user'), sshFlags=state.machineConf('ssh.options')) defer.returnValue(cluster) except errors.RemoteError, err: if err.name == 'igs.utils.auth_token.AuthTokenError': raise auth_token.AuthTokenError() else: raise except commands.ProgramRunError: raise errors.RemoteError('SSH failed')
def updateClusterInfo(state): try: clusters = yield state.persistManager.loadClustersByAdmin({}) updateUnresponsiveClusters(state.unresponsiveClusters, clusters) # Kinda sloppy to have to create this dictionary each time clustersMap = dict([((cluster.clusterName, cluster.userName), cluster) for cluster in clusters]) for (clusterName, userName), duration in state.unresponsiveClusters.iteritems(): if duration > CLUSTER_TIMEOUT: cluster = clustersMap.get((clusterName, userName)) log.msg('CLEANUP: Terminating cluster - ' + clusterName) authToken = auth_token.generateToken(cluster.config('cluster.cluster_public_key')) yield clusters_client.terminateCluster(clusterName, userName, authToken) except Exception, err: log.err('CLEANUP: Failed') log.err(err)
#!/usr/bin/env python ## # Generates an authorization token for cluster communication given a file import sys from igs.utils import auth_token key_file = sys.argv[1] if not key_file: raise Exception('Must supply key file') print auth_token.generateToken(key_file)
def importCluster(state, credClient, taskName, remoteHost, srcCluster, cluster): """Handles retrieving metadata from the remote host and running through a battery of tests to ensure that the VM being imported is in a running state and reachable. """ @defer.inlineCallbacks def _saveCluster(instances): instances = yield credClient.updateInstances(instances) cl = yield state.persistManager.loadCluster(cluster.clusterName, cluster.userName) cl = cl.setMaster(instances[0]) yield state.persistManager.saveCluster(cl) defer.returnValue(func.Record(succeeded=instances, failed=[])) authToken = auth_token.generateToken( cluster.config('cluster.cluster_public_key')) remoteClusters = yield clusters_client_www.listClusters( remoteHost, {'cluster_name': srcCluster}, cluster.userName, authToken) remoteCluster = remoteClusters[0] if remoteCluster.get('state') in ['terminated', 'failed']: raise Error('Imported cluster in TERMINATED or FAILED state') # If we are importing a local cluster the public and private DNS will # not be valid hostnames that we can query. Need to set them to the # remote host provided in the import-clusters call if 'clovr-' in remoteCluster['master']['public_dns']: remoteCluster['master']['public_dns'] = remoteHost remoteCluster['master']['private_dns'] = remoteHost # Sorta hacky but we need to check whether or not a master node is # associated with the cluster being imported before proceeding _instances = yield waitForInstances([remoteCluster], [ updateTask(taskName, 'Waiting for populated master node'), waitForPopulatedMasterNode(srcCluster, authToken, WAIT_FOR_STATE_TRIES) ]) if not _instances.succeeded: raise Error('Could not retrieve master node from imported cluster.') baseConf = config.configFromMap(cluster.config.conf) remoteClusterConf = config.configFromMap( {'general.src_cluster': srcCluster}, base=baseConf) cl = cluster.update(config=remoteClusterConf) cl = cl.setMaster(remoteCluster.get('master')) yield state.persistManager.saveCluster(cl) log.msg('DEBUG importCluster: About to run tests on master node') _instances = yield waitForInstances([remoteCluster.get('master')], [ updateTask(taskName, 'Waiting for master'), waitForState(credClient, 'running', WAIT_FOR_STATE_TRIES), _saveCluster, waitForSSH(cluster.config('ssh.user'), cluster.config('ssh.options'), WAIT_FOR_SSH_TRIES), _saveCluster, updateTask(taskName, 'SSH up'), updateTask(taskName, 'Master in running state') ]) if not _instances.succeeded: raise Error('Failed to import cluster') # TODO: Maybe implement another set of checks here on our exec nodes. if remoteCluster.get('exec_nodes'): cl = cl.addExecNodes(remoteCluster.get('exec_nodes')) yield state.persistManager.saveCluster(cl) cl = yield state.persistManager.loadCluster(cl.clusterName, cl.userName) cl = cl.setState(cl.RUNNING) yield state.persistManager.saveCluster(cl) defer.returnValue(cl)
def importCluster(state, credClient, taskName, remoteHost, srcCluster, cluster): """Handles retrieving metadata from the remote host and running through a battery of tests to ensure that the VM being imported is in a running state and reachable. """ @defer.inlineCallbacks def _saveCluster(instances): instances = yield credClient.updateInstances(instances) cl = yield state.persistManager.loadCluster(cluster.clusterName, cluster.userName) cl = cl.setMaster(instances[0]) yield state.persistManager.saveCluster(cl) defer.returnValue(func.Record(succeeded=instances, failed=[])) authToken = auth_token.generateToken(cluster.config('cluster.cluster_public_key')) remoteClusters = yield clusters_client_www.listClusters(remoteHost, {'cluster_name': srcCluster}, cluster.userName, authToken) remoteCluster = remoteClusters[0] if remoteCluster.get('state') in ['terminated', 'failed']: raise Error('Imported cluster in TERMINATED or FAILED state') # If we are importing a local cluster the public and private DNS will # not be valid hostnames that we can query. Need to set them to the # remote host provided in the import-clusters call if 'clovr-' in remoteCluster['master']['public_dns']: remoteCluster['master']['public_dns'] = remoteHost remoteCluster['master']['private_dns'] = remoteHost # Sorta hacky but we need to check whether or not a master node is # associated with the cluster being imported before proceeding _instances = yield waitForInstances([remoteCluster], [updateTask(taskName, 'Waiting for populated master node'), waitForPopulatedMasterNode(srcCluster, authToken, WAIT_FOR_STATE_TRIES)]) if not _instances.succeeded: raise Error('Could not retrieve master node from imported cluster.') baseConf = config.configFromMap(cluster.config.conf) remoteClusterConf = config.configFromMap({'general.src_cluster': srcCluster}, base=baseConf) cl = cluster.update(config=remoteClusterConf) cl = cl.setMaster(remoteCluster.get('master')) yield state.persistManager.saveCluster(cl) log.msg('DEBUG importCluster: About to run tests on master node') _instances = yield waitForInstances([remoteCluster.get('master')], [updateTask(taskName, 'Waiting for master'), waitForState(credClient, 'running', WAIT_FOR_STATE_TRIES), _saveCluster, waitForSSH(cluster.config('ssh.user'), cluster.config('ssh.options'), WAIT_FOR_SSH_TRIES), _saveCluster, updateTask(taskName, 'SSH up'), updateTask(taskName, 'Master in running state')]) if not _instances.succeeded: raise Error('Failed to import cluster') # TODO: Maybe implement another set of checks here on our exec nodes. if remoteCluster.get('exec_nodes'): cl = cl.addExecNodes(remoteCluster.get('exec_nodes')) yield state.persistManager.saveCluster(cl) cl = yield state.persistManager.loadCluster(cl.clusterName, cl.userName) cl = cl.setState(cl.RUNNING) yield state.persistManager.saveCluster(cl) defer.returnValue(cl)