def run(componentConfig, templateXml, pipelineConfig, tmpDir): # First, add pipelineConfig to the componentConfig in the corrrect place newComponentConfig = os.path.join(tmpDir, 'component-' + global_state.make_ref() + '.conf') replacedComponentConfig = os.path.join(tmpDir, 'replacedcomponent-' + global_state.make_ref() + '.conf') instanceXml = os.path.join(tmpDir, global_state.make_ref() + '.xml') _addPipelineConfigToComponent(componentConfig, pipelineConfig, newComponentConfig) yield _replaceComponentConfigKeys(newComponentConfig, replacedComponentConfig) yield _replaceTemplateKeys(templateXml, replacedComponentConfig, instanceXml) yield _runWorkflow(instanceXml) os.unlink(newComponentConfig) os.unlink(replacedComponentConfig) os.unlink(instanceXml)
def run(componentConfig, templateXml, pipelineConfig, tmpDir): # First, add pipelineConfig to the componentConfig in the corrrect place newComponentConfig = os.path.join(tmpDir, "component-" + global_state.make_ref() + ".conf") replacedComponentConfig = os.path.join(tmpDir, "replacedcomponent-" + global_state.make_ref() + ".conf") instanceXml = os.path.join(tmpDir, global_state.make_ref() + ".xml") _addPipelineConfigToComponent(componentConfig, pipelineConfig, newComponentConfig) yield _replaceComponentConfigKeys(newComponentConfig, replacedComponentConfig) yield _replaceTemplateKeys(templateXml, replacedComponentConfig, instanceXml) yield _runWorkflow(instanceXml) os.unlink(newComponentConfig) os.unlink(replacedComponentConfig) os.unlink(instanceXml)
def handleRunSpotInstances(request): userDataFile=None if 'user_data' in request.body: userData = credentials_misc.replaceUserDataVariables(request.credential, request.body['user_data']) userDataFile = '/tmp/' + global_state.make_ref() + '.conf' fout = open(userDataFile, 'w') fout.write(userData + '\n') fout.close() instances = yield request.credential.runSpotInstances(bidPrice=request.body['bid_price'], amiId=request.body['ami'], key=request.body['key'], instanceType=request.body['instance_type'], groups=request.body['groups'], availabilityZone=request.body.get('availability_zone', None), number=request.body.get('num_instances', 1), userDataFile=userDataFile, log=True) if userDataFile: os.remove(userDataFile) yield request.state.credentialsCache.invalidate(request.credential.name) queue.returnQueueSuccess(request.mq, request.body['return_queue'], [request.credential.instanceToDict(i) for i in instances]) defer.returnValue(request)
def unsubscribe(self, destination, headers=None, receipt=None): receiptId = "unsubscribe-" + global_state.make_ref() if receipt: self.receipts[receiptId] = _iterFuncs([lambda: self.factory._removeSubscription(destination), receipt]) else: self.receipts[receiptId] = lambda: self.factory._removeSubscription(destination) self.factory.mqClient.sendMessage(stomper.unsubscribe(destination, headers={"receipt": receiptId}))
def createMasterDataFile(cluster, machineConf): """ Creates a master data file as the perl start_cluster works """ template = open(cluster.config('cluster.master_user_data_tmpl')).read() clusterPrivateKey = open( cluster.config('cluster.cluster_private_key')).read() outf = [] runSingleProgramEx('ssh-keygen -y -f ' + cluster.config('cluster.cluster_private_key'), outf.append, None, log=logging.DEBUG) clusterPublicKey = ''.join(outf) template = template.replace('<TMPL_VAR NAME=CLUSTER_PRIVATE_KEY>', clusterPrivateKey) template = template.replace('<TMPL_VAR NAME=CLUSTER_PUBLIC_KEY>', clusterPublicKey) # Need to escape the ${ for bash template = template.replace('<TMPL_VAR NAME=MACHINE_CONF>', open(machineConf).read().replace('${', '\\${')) outf = os.path.join(cluster.config('general.secure_tmp'), 'master_user_data.%s.sh' % global_state.make_ref()) open(outf, 'w').write(template) return outf
def subscribe(self, handler, destination, headers=None, ack="client", receipt=None): # Setting receipt to none here because we know it will be fired when we send it now # unless something goes bad now..but what are the chances of that? self.factory._subscriptions.append( {"handler": handler, "destination": destination, "headers": headers, "ack": ack, "receipt": None} ) if receipt: receiptId = receiptId = "subscribe-" + global_state.make_ref() headers = _updateOrCreateDict(headers, {"receipt": receiptId}) self.receipts[receiptId] = receipt self.factory.mqClient.sendMessage(stomper.subscribe(destination, ack=ack, headers=headers))
def __init__(self, factory): self.factory = factory self.receipts = {} for subscription in self.factory._subscriptions: if subscription["receipt"]: receiptId = "subscribe-" + global_state.make_ref() self.receipts[receiptId] = subscription["receipt"] headers = _updateOrCreateDict(subscription["headers"], {"receipt": receiptId}) # We only want to call this the first time it happens, that way reconnects # are invisible to the user subscription["receipt"] = None else: headers = subscription["headers"] self.factory.mqClient.sendMessage( stomper.subscribe(subscription["destination"], ack=subscription["ack"], headers=headers) ) for send in self.factory._sends: self.send(send["destination"], send["body"], send["headers"], send["receipt"]) # Once we have sent these remove them self.factory._sends = []
def _run(state, batchState): if 'pipeline_name' in batchState: pipelines = yield pipelines_client.pipelineList('localhost', 'local', 'guest', batchState['pipeline_name'], detail=True) else: pipelines = [] if not pipelines: _log(batchState, 'First time running, creating pipeline state information') batchState['pipeline_config'] = yield _applyActions(state.innerPipelineConfig(), batchState['actions']) batchState['pipeline_state'] = STARTCLUSTER_STATE # We need to create a fake, local, pipeline for metrics to work batchState['pipeline_name'] = pipeline_misc.checksumInput(batchState['pipeline_config']) batchState['pipeline_config']['pipeline.PIPELINE_NAME'] = batchState['pipeline_name'] batchState['pipeline_config']['pipeline.PIPELINE_WRAPPER_NAME'] = batchState['pipeline_name'] _log(batchState, 'Pipeline named ' + batchState['pipeline_name']) pipeline = yield pipelines_client.createPipeline(host='localhost', clusterName='local', userName='******', pipelineName=batchState['pipeline_name'], protocol='clovr_wrapper', queue='pipeline.q', config=batchState['pipeline_config'], parentPipeline=state.parentPipeline()) batchState['lgt_wrapper_task_name'] = pipeline['task_name'] _log(batchState, 'Setting number of tasks to 9 (number in a standard lgt_wrapper)') yield _updateTask(batchState, lambda t : t.update(completedTasks=0, numTasks=9)) state.updateBatchState() batchState['state'] = RUNNING_STATE _log(batchState, 'Pipeline started in %s state' % batchState['pipeline_state']) yield _updateTask(batchState, lambda t : t.setState(tasks.task.TASK_RUNNING)) pipelineConfigFile = os.path.join(TMP_DIR, 'pipeline_configs', global_state.make_ref() + '.conf') _log(batchState, 'Creating ergatis configuration') _writeErgatisConfig(batchState['pipeline_config'], pipelineConfigFile) if batchState['pipeline_state'] == STARTCLUSTER_STATE: _log(batchState, 'Pipeline is in STARTCLUSTER state') # First see if the cluster exists but is unresponsive try: cluster = yield loadCluster('localhost', batchState['pipeline_config']['cluster.CLUSTER_NAME'], 'guest') if cluster['state'] == 'unresponsive': _log(batchState, 'Pipeline is unresponsive, terminating') terminateTask = yield clusters_client.terminateCluster('localhost', batchState['pipeline_config']['cluster.CLUSTER_NAME'], 'guest') yield _blockOnTask(terminateTask) except: pass batchState['cluster_task'] = yield startCluster( batchState, 'localhost', batchState['pipeline_config']['cluster.CLUSTER_NAME'], 'guest', int(batchState['pipeline_config']['cluster.EXEC_NODES']), 0, batchState['pipeline_config']['cluster.CLUSTER_CREDENTIAL'], {'cluster.MASTER_INSTANCE_TYPE': batchState['pipeline_config']['cluster.MASTER_INSTANCE_TYPE'], 'cluster.MASTER_BID_PRICE': batchState['pipeline_config']['cluster.MASTER_BID_PRICE'], 'cluster.EXEC_INSTANCE_TYPE': batchState['pipeline_config']['cluster.EXEC_INSTANCE_TYPE'], 'cluster.EXEC_BID_PRICE': batchState['pipeline_config']['cluster.EXEC_BID_PRICE']}) yield _updateTask(batchState, lambda t : t.update(completedTasks=0, numTasks=9)) yield _updateTask(batchState, lambda t : t.addMessage(tasks.task.MSG_SILENT, 'Completed startcluster' ).progress()) batchState['pipeline_state'] = REMOTE_LOCAL_TRANSFER_STATE state.updateBatchState() if batchState['pipeline_state'] == REMOTE_LOCAL_TRANSFER_STATE: _log(batchState, 'Pipeline is in REMOTE_LOCAL_TRANSFER') _log(batchState, 'Making sure cluster is exists in some form') cluster = yield clusters_client.loadCluster('localhost', batchState['pipeline_config']['cluster.CLUSTER_NAME'], 'guest') if cluster['state'] == 'unresponsive': _log(batchState, 'Pipeline is unresponsive, erroring and restarting') raise Exception('Cluster is not responsive') yield state.prerunQueue.addWithDeferred(_remoteLocalTransfer, batchState) yield _updateTask(batchState, lambda t : t.addMessage(tasks.task.MSG_SILENT, 'Completed remote_local_transfer' ).progress()) batchState['pipeline_state'] = DECRYPT_STATE state.updateBatchState() if batchState['pipeline_state'] == DECRYPT_STATE: _log(batchState, 'Pipeline is in DECRYPT') cluster = yield loadCluster('localhost', batchState['pipeline_config']['cluster.CLUSTER_NAME'], 'guest') tag = yield tags_client.loadTag('localhost', batchState['pipeline_config']['cluster.CLUSTER_NAME'], 'guest', _decryptTagName(batchState)) conf = config.configFromStream(open('/tmp/machine.conf')) yield ssh.runProcessSSH(cluster['master']['public_dns'], 'mkdir -p /mnt/lgt_decrypt', stdoutf=None, stderrf=None, sshUser=conf('ssh.user'), sshFlags=conf('ssh.options'), log=True) yield rsync.rsyncTo(cluster['master']['public_dns'], batchState['pipeline_config']['params.DECRYPT_SCRIPT'], '/mnt/', options=conf('rsync.options'), user=conf('rsync.user'), log=True) for f in tag['files']: decryptCmd = ' '.join([os.path.join('/mnt', os.path.basename(batchState['pipeline_config']['params.DECRYPT_SCRIPT'])), f, '-out-dir', '/mnt/lgt_decrypt', '-remove-encrypted', '-password', batchState['pipeline_config']['params.DECRYPT_PASSWORD']]) yield ssh.getOutput(cluster['master']['public_dns'], decryptCmd, sshUser=conf('ssh.user'), sshFlags=conf('ssh.options'), expected=[0, 253], log=True) tag = yield tags_client.tagData(host='localhost', clusterName=batchState['pipeline_config']['cluster.CLUSTER_NAME'], userName='******', action='overwrite', tagName=_decryptTagName(batchState), files=['/mnt/lgt_decrypt'], metadata={}, recursive=True, expand=False, compressDir=None) _log(batchState, 'Waiting for tagging of %s to complete - %s' % (_decryptTagName(batchState), tag['task_name'])) yield _blockOnTask(tag['task_name'], cluster=batchState['pipeline_config']['cluster.CLUSTER_NAME']) yield _updateTask(batchState, lambda t : t.addMessage(tasks.task.MSG_SILENT, 'Completed decrypt' ).progress()) batchState['pipeline_state'] = REFERENCE_TRANSFER_STATE state.updateBatchState() if batchState['pipeline_state'] == REFERENCE_TRANSFER_STATE: _log(batchState, 'Pipeline is in REFERENCE_TRANSFER state') transfers = [] tags = (batchState['pipeline_config']['input.REF_TAG1'].split(',') + batchState['pipeline_config']['input.REF_TAG2'].split(',')) for tag in tags: tag = tag.strip() output = yield _getOutput(batchState, ['vp-transfer-dataset', '-t', '--tag-name=' + tag, '--dst-cluster=' + batchState['pipeline_config']['cluster.CLUSTER_NAME']], log=True) transfers.append(output['stdout'].strip()) for task in transfers: yield _blockOnTask(task) yield _updateTask(batchState, lambda t : t.addMessage(tasks.task.MSG_SILENT, 'Completed reference_transfer' ).progress()) batchState['pipeline_state'] = RUN_PIPELINE_STATE state.updateBatchState() if batchState['pipeline_state'] == RUN_PIPELINE_STATE: _log(batchState, 'Pipeline is in RUN_PIPELINE state') batchState['pipeline_config']['input.INPUT_TAG'] = _decryptTagName(batchState) pipeline = yield pipelines_client.runPipeline(host='localhost', clusterName=batchState['pipeline_config']['cluster.CLUSTER_NAME'], userName='******', parentPipeline=batchState['pipeline_name'], bareRun=True, queue=state.innerPipelineQueue(), config=batchState['pipeline_config'], overwrite=True) batchState['pipeline_task'] = pipeline['task_name'] yield _updateTask(batchState, lambda t : t.addMessage(tasks.task.MSG_SILENT, 'Completed run pipeline' ).progress()) batchState['pipeline_state'] = RUNNING_PIPELINE_STATE state.updateBatchState() if batchState['pipeline_state'] == RUNNING_PIPELINE_STATE: _log(batchState, 'Pipeline is in RUNNING_PIPELINE state') _monitorPipeline(batchState) yield _waitForPipeline(batchState) yield _updateTask(batchState, lambda t : t.addMessage(tasks.task.MSG_SILENT, 'Completed running pipeline' ).progress()) batchState['pipeline_state'] = HARVEST_STATE state.updateBatchState() if batchState['pipeline_state'] == HARVEST_STATE: _log(batchState, 'Pipeline is in HARVEST state') # Using prerunqueue because we want everything here serialized yield state.prerunQueue.addWithDeferred(_harvestTransfer, batchState) yield _updateTask(batchState, lambda t : t.addMessage(tasks.task.MSG_SILENT, 'Completed harvest' ).progress()) batchState['pipeline_state'] = SHUTDOWN_STATE state.updateBatchState() if batchState['pipeline_state'] == SHUTDOWN_STATE: _log(batchState, 'Pipeline is in SHUTDOWN state') if 'add_instances_task' in batchState: try: yield _blockOnTask(batchState['add_instances_task'], cluster=batchState['pipeline_config']['cluster.CLUSTER_NAME']) except Exception, err: logging.errorPrint(str(err)) log.err(err) yield clusters_client.terminateCluster('localhost', batchState['pipeline_config']['cluster.CLUSTER_NAME'], 'guest') yield _updateTask(batchState, lambda t : t.addMessage(tasks.task.MSG_SILENT, 'Completed shutdown' ).progress()) batchState['pipeline_state'] = COMPLETED_STATE batchState['state'] = COMPLETED_STATE state.updateBatchState()
def randomQueueName(baseName): return '/topic/' + baseName + '-' + global_state.make_ref()
def send(self, destination, body, headers=None, receipt=None): if receipt: receiptId = receiptId = "send-" + global_state.make_ref() headers = _updateOrCreateDict(headers, {"receipt": receiptId}) self.receipts[receiptId] = receipt self.factory.mqClient.sendMessage(stomper.send(destination, body, headers))
def createMasterDataFile(cluster, machineConf): """ Creates a master data file as the perl start_cluster works """ template = open(cluster.config('cluster.master_user_data_tmpl')).read() clusterPrivateKey = open(cluster.config('cluster.cluster_private_key')).read() outf = [] runSingleProgramEx('ssh-keygen -y -f ' + cluster.config('cluster.cluster_private_key'), outf.append, None, log=logging.DEBUG) clusterPublicKey = ''.join(outf) template = template.replace('<TMPL_VAR NAME=CLUSTER_PRIVATE_KEY>', clusterPrivateKey) template = template.replace('<TMPL_VAR NAME=CLUSTER_PUBLIC_KEY>', clusterPublicKey) # Need to escape the ${ for bash template = template.replace('<TMPL_VAR NAME=MACHINE_CONF>', open(machineConf).read().replace('${', '\\${')) outf = os.path.join(cluster.config('general.secure_tmp'), 'master_user_data.%s.sh' % global_state.make_ref()) open(outf, 'w').write(template) return outf
def _run(state, batchState): if 'state' not in batchState: _log(batchState, 'First time running, creating pipeline state information') batchState['pipeline_config'] = yield _applyActions(state.innerPipelineConfig(), batchState['actions']) batchState['pipeline_state'] = PRESTART_STATE # We need to create a fake, local, pipeline for metrics to work batchState['pipeline_name'] = pipeline_misc.checksumInput(batchState['pipeline_config']) batchState['pipeline_config']['pipeline.PIPELINE_NAME'] = batchState['pipeline_name'] batchState['pipeline_config']['pipeline.PIPELINE_WRAPPER_NAME'] = batchState['pipeline_name'] _log(batchState, 'Pipeline named ' + batchState['pipeline_name']) pipeline = yield pipelines_client.createPipeline(host='localhost', clusterName='local', userName='******', pipelineName=batchState['pipeline_name'], protocol='clovr_wrapper', queue='pipeline.q', config=batchState['pipeline_config'], parentPipeline=state.parentPipeline()) batchState['clovr_wrapper_task_name'] = pipeline['task_name'] _log(batchState, 'Setting number of tasks to 6 (number in a standard clovr_wrapper)') yield _updateTask(batchState, lambda t : t.update(completedTasks=0, numTasks=6)) state.updateBatchState() else: _log(batchState, 'Pipeline run before, loading pipeline information') pipeline = yield pipelines_client.pipelineList('localhost', 'local', 'guest', batchState['pipeline_name'], detail=True) batchState['state'] = RUNNING_STATE yield _updateTask(batchState, lambda t : t.setState(tasks.task.TASK_RUNNING)) pipelineConfigFile = os.path.join(TMP_DIR, 'pipeline_configs', global_state.make_ref() + '.conf') _log(batchState, 'Creating ergatis configuration') _writeErgatisConfig(batchState['pipeline_config'], pipelineConfigFile) if batchState['pipeline_state'] == PRESTART_STATE: _log(batchState, 'Pipeline is in PRESTART state') yield state.prerunQueue.addWithDeferred(workflow_runner.run, state.workflowConfig(), batchState['pipeline_config']['pipeline.PRESTART_TEMPLATE_XML'], pipelineConfigFile, TMP_DIR) yield _updateTask(batchState, lambda t : t.addMessage(tasks.task.MSG_SILENT, 'Completed prestart' ).progress()) batchState['pipeline_state'] = STARTING_STATE state.updateBatchState() if batchState['pipeline_state'] == STARTING_STATE: _log(batchState, 'Pipeline is in STARTING state') clusterTask = yield clusters_client.startCluster( 'localhost', batchState['pipeline_config']['cluster.CLUSTER_NAME'], 'guest', int(batchState['pipeline_config']['cluster.EXEC_NODES']), 0, batchState['pipeline_config']['cluster.CLUSTER_CREDENTIAL'], {'cluster.master_type': batchState['pipeline_config']['cluster.MASTER_INSTANCE_TYPE'], 'cluster.master_bid_price': batchState['pipeline_config']['cluster.MASTER_BID_PRICE'], 'cluster.exec_type': batchState['pipeline_config']['cluster.EXEC_INSTANCE_TYPE'], 'cluster.exec_bid_price': batchState['pipeline_config']['cluster.EXEC_BID_PRICE']}) taskState = yield tasks.blockOnTask('localhost', 'local', clusterTask) if taskState != tasks.task.TASK_COMPLETED: raise TaskError(clusterTask) yield _updateTask(batchState, lambda t : t.addMessage(tasks.task.MSG_SILENT, 'Completed start' ).progress()) batchState['pipeline_state'] = PRERUN_STATE state.updateBatchState() if batchState['pipeline_state'] == PRERUN_STATE: _log(batchState, 'Pipeline is in PRERUN state') yield state.prerunQueue.addWithDeferred(workflow_runner.run, state.workflowConfig(), batchState['pipeline_config']['pipeline.PRERUN_TEMPLATE_XML'], pipelineConfigFile, TMP_DIR) yield _updateTask(batchState, lambda t : t.addMessage(tasks.task.MSG_SILENT, 'Completed prerun' ).progress()) batchState['pipeline_state'] = RUN_PIPELINE_STATE state.updateBatchState() if batchState['pipeline_state'] == RUN_PIPELINE_STATE: _log(batchState, 'Pipeline is in RUN_PIPELINE state') pipeline = yield pipelines_client.runPipeline(host='localhost', clusterName=batchState['pipeline_config']['cluster.CLUSTER_NAME'], userName='******', parentPipeline=batchState['pipeline_name'], bareRun=True, queue=state.innerPipelineQueue(), config=batchState['pipeline_config'], overwrite=True) batchState['pipeline_task'] = pipeline['task_name'] yield _updateTask(batchState, lambda t : t.addMessage(tasks.task.MSG_SILENT, 'Completed run pipeline' ).progress()) batchState['pipeline_state'] = RUNNING_PIPELINE_STATE state.updateBatchState() if batchState['pipeline_state'] == RUNNING_PIPELINE_STATE: _log(batchState, 'Pipeline is in RUNNING_PIPELINE state') _monitorPipeline(batchState) yield _waitForPipeline(batchState) yield _updateTask(batchState, lambda t : t.addMessage(tasks.task.MSG_SILENT, 'Completed running pipeline' ).progress()) batchState['pipeline_state'] = POSTRUN_STATE state.updateBatchState() if batchState['pipeline_state'] == POSTRUN_STATE: _log(batchState, 'Pipeline is in POSTRUN state') yield state.postrunQueue.addWithDeferred(workflow_runner.run, state.workflowConfig(), batchState['pipeline_config']['pipeline.POSTRUN_TEMPLATE_XML'], pipelineConfigFile, TMP_DIR) yield _updateTask(batchState, lambda t : t.addMessage(tasks.task.MSG_SILENT, 'Completed postrun' ).progress()) batchState['pipeline_state'] = COMPLETED_STATE batchState['state'] = COMPLETED_STATE state.updateBatchState() yield _updateTask(batchState, lambda t : t.setState(tasks.task.TASK_COMPLETED)) _log(batchState, 'Pipeline finished successfully')
def startMaster(state, credClient, taskName, cluster): @defer.inlineCallbacks def _saveCluster(instances): instances = yield credClient.updateInstances(instances) cl = yield state.persistManager.loadCluster(cluster.clusterName, cluster.userName) cl = cl.setMaster(instances[0]) yield state.persistManager.saveCluster(cl) defer.returnValue(func.Record(succeeded=instances, failed=[])) credConfigMap = yield credClient.credentialConfig() credConfig = config.configFromMap(credConfigMap) baseConf = config.configFromConfig(cluster.config, base=credConfig) clusterConf = config.configFromMap( { 'general.ctype': credConfig('general.ctype'), 'cluster.cluster_public_key': '/mnt/keys/devel1.pem.pub' }, base=baseConf) cl = cluster.update(config=clusterConf) mode = [vappio_config.MASTER_NODE] masterConfFilename = '/tmp/machine.' + global_state.make_ref() + '.conf' masterConf = vappio_config.createDataFile(cl.config, mode, outFile=masterConfFilename) dataFile = vappio_config.createMasterDataFile(cl, masterConf) groups = [g.strip() for g in cl.config('cluster.master_groups').split(',')] masterInstanceList = yield runInstances( credClient, cl.config('cluster.ami'), cl.config('cluster.key'), cl.config('cluster.master_type'), groups, cl.config('cluster.availability_zone', default=None), cl.config('cluster.master_bid_price', default=None), 1, 1, open(dataFile).read()) cl = cl.setMaster(masterInstanceList[0]) yield state.persistManager.saveCluster(cl) os.remove(masterConf) os.remove(dataFile) instances = yield waitForInstances(masterInstanceList, [ updateTask(taskName, 'Waiting for master'), waitForState(credClient, 'running', WAIT_FOR_STATE_TRIES), _saveCluster, updateTask(taskName, 'Master in running state'), waitForSSH(cl.config('ssh.user'), cl.config('ssh.options'), WAIT_FOR_SSH_TRIES), _saveCluster, updateTask(taskName, 'SSH up'), waitForBoot('/tmp/startup_complete', cl.config('ssh.user'), cl.config('ssh.options'), WAIT_FOR_BOOT_TRIES), _saveCluster, updateTask(taskName, 'Booted'), waitForClusterInfo('local', 'guest', WAIT_FOR_SERVICES_TRIES), _saveCluster, updateTask(taskName, 'Cluster info responded') ]) yield credClient.terminateInstances(instances.failed) if not instances.succeeded: raise Error('Master failed to start') cl = yield state.persistManager.loadCluster(cl.clusterName, cl.userName) cl = cl.setState(cl.RUNNING) yield state.persistManager.saveCluster(cl) defer.returnValue(cl)
def _run(state, batchState): if 'pipeline_name' in batchState: pipelines = yield pipelines_client.pipelineList( 'localhost', 'local', 'guest', batchState['pipeline_name'], detail=True) else: pipelines = [] if not pipelines: _log(batchState, 'First time running, creating pipeline state information') batchState['pipeline_config'] = yield _applyActions( state.innerPipelineConfig(), batchState['actions']) batchState['pipeline_state'] = STARTCLUSTER_STATE # We need to create a fake, local, pipeline for metrics to work batchState['pipeline_name'] = pipeline_misc.checksumInput( batchState['pipeline_config']) batchState['pipeline_config']['pipeline.PIPELINE_NAME'] = batchState[ 'pipeline_name'] batchState['pipeline_config'][ 'pipeline.PIPELINE_WRAPPER_NAME'] = batchState['pipeline_name'] _log(batchState, 'Pipeline named ' + batchState['pipeline_name']) pipeline = yield pipelines_client.createPipeline( host='localhost', clusterName='local', userName='******', pipelineName=batchState['pipeline_name'], protocol='clovr_wrapper', queue='pipeline.q', config=batchState['pipeline_config'], parentPipeline=state.parentPipeline()) batchState['lgt_wrapper_task_name'] = pipeline['task_name'] _log( batchState, 'Setting number of tasks to 9 (number in a standard lgt_wrapper)') yield _updateTask(batchState, lambda t: t.update(completedTasks=0, numTasks=9)) state.updateBatchState() batchState['state'] = RUNNING_STATE _log(batchState, 'Pipeline started in %s state' % batchState['pipeline_state']) yield _updateTask(batchState, lambda t: t.setState(tasks.task.TASK_RUNNING)) pipelineConfigFile = os.path.join(TMP_DIR, 'pipeline_configs', global_state.make_ref() + '.conf') _log(batchState, 'Creating ergatis configuration') _writeErgatisConfig(batchState['pipeline_config'], pipelineConfigFile) if batchState['pipeline_state'] == STARTCLUSTER_STATE: _log(batchState, 'Pipeline is in STARTCLUSTER state') # First see if the cluster exists but is unresponsive try: cluster = yield loadCluster( 'localhost', batchState['pipeline_config']['cluster.CLUSTER_NAME'], 'guest') if cluster['state'] == 'unresponsive': _log(batchState, 'Pipeline is unresponsive, terminating') terminateTask = yield clusters_client.terminateCluster( 'localhost', batchState['pipeline_config']['cluster.CLUSTER_NAME'], 'guest') yield _blockOnTask(terminateTask) except: pass batchState['cluster_task'] = yield startCluster( batchState, 'localhost', batchState['pipeline_config']['cluster.CLUSTER_NAME'], 'guest', int(batchState['pipeline_config']['cluster.EXEC_NODES']), 0, batchState['pipeline_config']['cluster.CLUSTER_CREDENTIAL'], { 'cluster.MASTER_INSTANCE_TYPE': batchState['pipeline_config']['cluster.MASTER_INSTANCE_TYPE'], 'cluster.MASTER_BID_PRICE': batchState['pipeline_config']['cluster.MASTER_BID_PRICE'], 'cluster.EXEC_INSTANCE_TYPE': batchState['pipeline_config']['cluster.EXEC_INSTANCE_TYPE'], 'cluster.EXEC_BID_PRICE': batchState['pipeline_config']['cluster.EXEC_BID_PRICE'] }) yield _updateTask(batchState, lambda t: t.update(completedTasks=0, numTasks=9)) yield _updateTask( batchState, lambda t: t.addMessage( tasks.task.MSG_SILENT, 'Completed startcluster').progress()) batchState['pipeline_state'] = REMOTE_LOCAL_TRANSFER_STATE state.updateBatchState() if batchState['pipeline_state'] == REMOTE_LOCAL_TRANSFER_STATE: _log(batchState, 'Pipeline is in REMOTE_LOCAL_TRANSFER') _log(batchState, 'Making sure cluster is exists in some form') cluster = yield clusters_client.loadCluster( 'localhost', batchState['pipeline_config']['cluster.CLUSTER_NAME'], 'guest') if cluster['state'] == 'unresponsive': _log(batchState, 'Pipeline is unresponsive, erroring and restarting') raise Exception('Cluster is not responsive') yield state.prerunQueue.addWithDeferred(_remoteLocalTransfer, batchState) yield _updateTask( batchState, lambda t: t.addMessage( tasks.task.MSG_SILENT, 'Completed remote_local_transfer'). progress()) batchState['pipeline_state'] = DECRYPT_STATE state.updateBatchState() if batchState['pipeline_state'] == DECRYPT_STATE: _log(batchState, 'Pipeline is in DECRYPT') cluster = yield loadCluster( 'localhost', batchState['pipeline_config']['cluster.CLUSTER_NAME'], 'guest') tag = yield tags_client.loadTag( 'localhost', batchState['pipeline_config']['cluster.CLUSTER_NAME'], 'guest', _decryptTagName(batchState)) conf = config.configFromStream(open('/tmp/machine.conf')) yield ssh.runProcessSSH(cluster['master']['public_dns'], 'mkdir -p /mnt/lgt_decrypt', stdoutf=None, stderrf=None, sshUser=conf('ssh.user'), sshFlags=conf('ssh.options'), log=True) yield rsync.rsyncTo( cluster['master']['public_dns'], batchState['pipeline_config']['params.DECRYPT_SCRIPT'], '/mnt/', options=conf('rsync.options'), user=conf('rsync.user'), log=True) for f in tag['files']: decryptCmd = ' '.join([ os.path.join( '/mnt', os.path.basename(batchState['pipeline_config'] ['params.DECRYPT_SCRIPT'])), f, '-out-dir', '/mnt/lgt_decrypt', '-remove-encrypted', '-password', batchState['pipeline_config']['params.DECRYPT_PASSWORD'] ]) yield ssh.getOutput(cluster['master']['public_dns'], decryptCmd, sshUser=conf('ssh.user'), sshFlags=conf('ssh.options'), expected=[0, 253], log=True) tag = yield tags_client.tagData( host='localhost', clusterName=batchState['pipeline_config']['cluster.CLUSTER_NAME'], userName='******', action='overwrite', tagName=_decryptTagName(batchState), files=['/mnt/lgt_decrypt'], metadata={}, recursive=True, expand=False, compressDir=None) _log( batchState, 'Waiting for tagging of %s to complete - %s' % (_decryptTagName(batchState), tag['task_name'])) yield _blockOnTask( tag['task_name'], cluster=batchState['pipeline_config']['cluster.CLUSTER_NAME']) yield _updateTask( batchState, lambda t: t.addMessage(tasks.task.MSG_SILENT, 'Completed decrypt').progress()) batchState['pipeline_state'] = REFERENCE_TRANSFER_STATE state.updateBatchState() if batchState['pipeline_state'] == REFERENCE_TRANSFER_STATE: _log(batchState, 'Pipeline is in REFERENCE_TRANSFER state') transfers = [] tags = (batchState['pipeline_config']['input.REF_TAG1'].split(',') + batchState['pipeline_config']['input.REF_TAG2'].split(',')) for tag in tags: tag = tag.strip() output = yield _getOutput(batchState, [ 'vp-transfer-dataset', '-t', '--tag-name=' + tag, '--dst-cluster=' + batchState['pipeline_config']['cluster.CLUSTER_NAME'] ], log=True) transfers.append(output['stdout'].strip()) for task in transfers: yield _blockOnTask(task) yield _updateTask( batchState, lambda t: t.addMessage(tasks.task.MSG_SILENT, 'Completed reference_transfer').progress()) batchState['pipeline_state'] = RUN_PIPELINE_STATE state.updateBatchState() if batchState['pipeline_state'] == RUN_PIPELINE_STATE: _log(batchState, 'Pipeline is in RUN_PIPELINE state') batchState['pipeline_config']['input.INPUT_TAG'] = _decryptTagName( batchState) pipeline = yield pipelines_client.runPipeline( host='localhost', clusterName=batchState['pipeline_config']['cluster.CLUSTER_NAME'], userName='******', parentPipeline=batchState['pipeline_name'], bareRun=True, queue=state.innerPipelineQueue(), config=batchState['pipeline_config'], overwrite=True) batchState['pipeline_task'] = pipeline['task_name'] yield _updateTask( batchState, lambda t: t.addMessage( tasks.task.MSG_SILENT, 'Completed run pipeline').progress()) batchState['pipeline_state'] = RUNNING_PIPELINE_STATE state.updateBatchState() if batchState['pipeline_state'] == RUNNING_PIPELINE_STATE: _log(batchState, 'Pipeline is in RUNNING_PIPELINE state') _monitorPipeline(batchState) yield _waitForPipeline(batchState) yield _updateTask( batchState, lambda t: t.addMessage(tasks.task.MSG_SILENT, 'Completed running pipeline').progress()) batchState['pipeline_state'] = HARVEST_STATE state.updateBatchState() if batchState['pipeline_state'] == HARVEST_STATE: _log(batchState, 'Pipeline is in HARVEST state') # Using prerunqueue because we want everything here serialized yield state.prerunQueue.addWithDeferred(_harvestTransfer, batchState) yield _updateTask( batchState, lambda t: t.addMessage(tasks.task.MSG_SILENT, 'Completed harvest').progress()) batchState['pipeline_state'] = SHUTDOWN_STATE state.updateBatchState() if batchState['pipeline_state'] == SHUTDOWN_STATE: _log(batchState, 'Pipeline is in SHUTDOWN state') if 'add_instances_task' in batchState: try: yield _blockOnTask(batchState['add_instances_task'], cluster=batchState['pipeline_config'] ['cluster.CLUSTER_NAME']) except Exception, err: logging.errorPrint(str(err)) log.err(err) yield clusters_client.terminateCluster( 'localhost', batchState['pipeline_config']['cluster.CLUSTER_NAME'], 'guest') yield _updateTask( batchState, lambda t: t.addMessage( tasks.task.MSG_SILENT, 'Completed shutdown').progress()) batchState['pipeline_state'] = COMPLETED_STATE batchState['state'] = COMPLETED_STATE state.updateBatchState()
def startMaster(state, credClient, taskName, cluster): @defer.inlineCallbacks def _saveCluster(instances): instances = yield credClient.updateInstances(instances) cl = yield state.persistManager.loadCluster(cluster.clusterName, cluster.userName) cl = cl.setMaster(instances[0]) yield state.persistManager.saveCluster(cl) defer.returnValue(func.Record(succeeded=instances, failed=[])) credConfigMap = yield credClient.credentialConfig() credConfig = config.configFromMap(credConfigMap) baseConf = config.configFromConfig(cluster.config, base=credConfig) clusterConf = config.configFromMap({'general.ctype': credConfig('general.ctype'), 'cluster.cluster_public_key': '/mnt/keys/devel1.pem.pub'}, base=baseConf) cl = cluster.update(config=clusterConf) mode = [vappio_config.MASTER_NODE] masterConfFilename = '/tmp/machine.' + global_state.make_ref() + '.conf' masterConf = vappio_config.createDataFile(cl.config, mode, outFile=masterConfFilename) dataFile = vappio_config.createMasterDataFile(cl, masterConf) groups = [g.strip() for g in cl.config('cluster.master_groups').split(',')] masterInstanceList = yield runInstances(credClient, cl.config('cluster.ami'), cl.config('cluster.key'), cl.config('cluster.master_type'), groups, cl.config('cluster.availability_zone', default=None), cl.config('cluster.master_bid_price', default=None), 1, 1, open(dataFile).read()) cl = cl.setMaster(masterInstanceList[0]) yield state.persistManager.saveCluster(cl) os.remove(masterConf) os.remove(dataFile) instances = yield waitForInstances(masterInstanceList, [updateTask(taskName, 'Waiting for master'), waitForState(credClient, 'running', WAIT_FOR_STATE_TRIES), _saveCluster, updateTask(taskName, 'Master in running state'), waitForSSH(cl.config('ssh.user'), cl.config('ssh.options'), WAIT_FOR_SSH_TRIES), _saveCluster, updateTask(taskName, 'SSH up'), waitForBoot('/tmp/startup_complete', cl.config('ssh.user'), cl.config('ssh.options'), WAIT_FOR_BOOT_TRIES), _saveCluster, updateTask(taskName, 'Booted'), waitForClusterInfo('local', 'guest', WAIT_FOR_SERVICES_TRIES), _saveCluster, updateTask(taskName, 'Cluster info responded')]) yield credClient.terminateInstances(instances.failed) if not instances.succeeded: raise Error('Master failed to start') cl = yield state.persistManager.loadCluster(cl.clusterName, cl.userName) cl = cl.setState(cl.RUNNING) yield state.persistManager.saveCluster(cl) defer.returnValue(cl)