Beispiel #1
0
def _run(state, batchState):
    if 'pipeline_name' in batchState:
        pipelines = yield pipelines_client.pipelineList('localhost',
                                                        'local',
                                                        'guest',
                                                        batchState['pipeline_name'],
                                                        detail=True)
    else:
        pipelines = []
        
    if not pipelines:
        _log(batchState, 'First time running, creating pipeline state information')
        batchState['pipeline_config'] = yield _applyActions(state.innerPipelineConfig(),
                                                            batchState['actions'])
        batchState['pipeline_state'] = STARTCLUSTER_STATE

        # We need to create a fake, local, pipeline for metrics to work
        batchState['pipeline_name'] = pipeline_misc.checksumInput(batchState['pipeline_config'])
        batchState['pipeline_config']['pipeline.PIPELINE_NAME'] = batchState['pipeline_name']
        batchState['pipeline_config']['pipeline.PIPELINE_WRAPPER_NAME'] = batchState['pipeline_name']

        _log(batchState, 'Pipeline named ' + batchState['pipeline_name'])
        
        pipeline = yield pipelines_client.createPipeline(host='localhost',
                                                         clusterName='local',
                                                         userName='******',
                                                         pipelineName=batchState['pipeline_name'],
                                                         protocol='clovr_wrapper',
                                                         queue='pipeline.q',
                                                         config=batchState['pipeline_config'],
                                                         parentPipeline=state.parentPipeline())

        batchState['lgt_wrapper_task_name'] = pipeline['task_name']

        _log(batchState, 'Setting number of tasks to 9 (number in a standard lgt_wrapper)')
        yield _updateTask(batchState,
                          lambda t : t.update(completedTasks=0,
                                              numTasks=9))
        
        state.updateBatchState()

    batchState['state'] = RUNNING_STATE

    _log(batchState, 'Pipeline started in %s state' % batchState['pipeline_state'])

    yield _updateTask(batchState,
                      lambda t : t.setState(tasks.task.TASK_RUNNING))
    
    pipelineConfigFile = os.path.join(TMP_DIR, 'pipeline_configs', global_state.make_ref() + '.conf')
    
    _log(batchState, 'Creating ergatis configuration')
    _writeErgatisConfig(batchState['pipeline_config'], pipelineConfigFile)

    if batchState['pipeline_state'] == STARTCLUSTER_STATE:
        _log(batchState, 'Pipeline is in STARTCLUSTER state')

        # First see if the cluster exists but is unresponsive
        try:
            cluster = yield loadCluster('localhost',
                                        batchState['pipeline_config']['cluster.CLUSTER_NAME'],
                                        'guest')
            if cluster['state'] == 'unresponsive':
                _log(batchState, 'Pipeline is unresponsive, terminating')
                terminateTask = yield clusters_client.terminateCluster('localhost',
                                                                       batchState['pipeline_config']['cluster.CLUSTER_NAME'],
                                                                       'guest')
                yield _blockOnTask(terminateTask)
        except:
            pass

        batchState['cluster_task'] = yield startCluster(
            batchState,
            'localhost',
            batchState['pipeline_config']['cluster.CLUSTER_NAME'],
            'guest',
            int(batchState['pipeline_config']['cluster.EXEC_NODES']),
            0,
            batchState['pipeline_config']['cluster.CLUSTER_CREDENTIAL'],
            {'cluster.MASTER_INSTANCE_TYPE':
                 batchState['pipeline_config']['cluster.MASTER_INSTANCE_TYPE'],
             'cluster.MASTER_BID_PRICE':
                 batchState['pipeline_config']['cluster.MASTER_BID_PRICE'],
             'cluster.EXEC_INSTANCE_TYPE':
                 batchState['pipeline_config']['cluster.EXEC_INSTANCE_TYPE'],
             'cluster.EXEC_BID_PRICE':
                 batchState['pipeline_config']['cluster.EXEC_BID_PRICE']})

        yield _updateTask(batchState,
                          lambda t : t.update(completedTasks=0,
                                              numTasks=9))
        
        yield _updateTask(batchState,
                          lambda t : t.addMessage(tasks.task.MSG_SILENT,
                                                  'Completed startcluster'
                                                  ).progress())
        
        batchState['pipeline_state'] = REMOTE_LOCAL_TRANSFER_STATE
        state.updateBatchState()

    if batchState['pipeline_state'] == REMOTE_LOCAL_TRANSFER_STATE:
        _log(batchState, 'Pipeline is in REMOTE_LOCAL_TRANSFER')

        _log(batchState, 'Making sure cluster is exists in some form')
        cluster = yield clusters_client.loadCluster('localhost',
                                                    batchState['pipeline_config']['cluster.CLUSTER_NAME'],
                                                    'guest')

        if cluster['state'] == 'unresponsive':
            _log(batchState, 'Pipeline is unresponsive, erroring and restarting')
            raise Exception('Cluster is not responsive')
        
        yield state.prerunQueue.addWithDeferred(_remoteLocalTransfer,
                                                batchState)

        yield _updateTask(batchState,
                          lambda t : t.addMessage(tasks.task.MSG_SILENT,
                                                  'Completed remote_local_transfer'
                                                  ).progress())

        batchState['pipeline_state'] = DECRYPT_STATE
        state.updateBatchState()

    if batchState['pipeline_state'] == DECRYPT_STATE:
        _log(batchState, 'Pipeline is in DECRYPT')

        cluster = yield loadCluster('localhost',
                                    batchState['pipeline_config']['cluster.CLUSTER_NAME'],
                                    'guest')

        tag = yield tags_client.loadTag('localhost',
                                        batchState['pipeline_config']['cluster.CLUSTER_NAME'],
                                        'guest',
                                        _decryptTagName(batchState))

        conf = config.configFromStream(open('/tmp/machine.conf'))

        yield ssh.runProcessSSH(cluster['master']['public_dns'],
                                'mkdir -p /mnt/lgt_decrypt',
                                stdoutf=None,
                                stderrf=None,
                                sshUser=conf('ssh.user'),
                                sshFlags=conf('ssh.options'),
                                log=True)

        yield rsync.rsyncTo(cluster['master']['public_dns'],
                            batchState['pipeline_config']['params.DECRYPT_SCRIPT'],
                            '/mnt/',
                            options=conf('rsync.options'),
                            user=conf('rsync.user'),
                            log=True)
        
        for f in tag['files']:
            decryptCmd = ' '.join([os.path.join('/mnt', os.path.basename(batchState['pipeline_config']['params.DECRYPT_SCRIPT'])),
                                   f,
                                   '-out-dir', '/mnt/lgt_decrypt',
                                   '-remove-encrypted',
                                   '-password', batchState['pipeline_config']['params.DECRYPT_PASSWORD']])
                                       
            
            yield ssh.getOutput(cluster['master']['public_dns'],
                                decryptCmd,
                                sshUser=conf('ssh.user'),
                                sshFlags=conf('ssh.options'),
                                expected=[0, 253],
                                log=True)

        tag = yield tags_client.tagData(host='localhost',
                                        clusterName=batchState['pipeline_config']['cluster.CLUSTER_NAME'],
                                        userName='******',
                                        action='overwrite',
                                        tagName=_decryptTagName(batchState),
                                        files=['/mnt/lgt_decrypt'],
                                        metadata={},
                                        recursive=True,
                                        expand=False,
                                        compressDir=None)

        _log(batchState, 'Waiting for tagging of %s to complete - %s' % (_decryptTagName(batchState),
                                                                         tag['task_name']))

        yield _blockOnTask(tag['task_name'],
                           cluster=batchState['pipeline_config']['cluster.CLUSTER_NAME'])
        
        yield _updateTask(batchState,
                          lambda t : t.addMessage(tasks.task.MSG_SILENT,
                                                  'Completed decrypt'
                                                  ).progress())

        batchState['pipeline_state'] = REFERENCE_TRANSFER_STATE
        state.updateBatchState()
        

    if batchState['pipeline_state'] == REFERENCE_TRANSFER_STATE:
        _log(batchState, 'Pipeline is in REFERENCE_TRANSFER state')
        
        transfers = []
        tags = (batchState['pipeline_config']['input.REF_TAG1'].split(',') +
                batchState['pipeline_config']['input.REF_TAG2'].split(','))
        for tag in tags:
            tag = tag.strip()
            output = yield _getOutput(batchState,
                                      ['vp-transfer-dataset',
                                       '-t',
                                       '--tag-name=' + tag,
                                       '--dst-cluster=' + batchState['pipeline_config']['cluster.CLUSTER_NAME']],
                                      log=True)
            
            transfers.append(output['stdout'].strip())

        for task in transfers:
            yield _blockOnTask(task)

        yield _updateTask(batchState,
                          lambda t : t.addMessage(tasks.task.MSG_SILENT,
                                                  'Completed reference_transfer'
                                                  ).progress())

        batchState['pipeline_state'] = RUN_PIPELINE_STATE
        state.updateBatchState()


    if batchState['pipeline_state'] == RUN_PIPELINE_STATE:
        _log(batchState, 'Pipeline is in RUN_PIPELINE state')
        batchState['pipeline_config']['input.INPUT_TAG'] = _decryptTagName(batchState)
        pipeline = yield pipelines_client.runPipeline(host='localhost',
                                                      clusterName=batchState['pipeline_config']['cluster.CLUSTER_NAME'],
                                                      userName='******',
                                                      parentPipeline=batchState['pipeline_name'],
                                                      bareRun=True,
                                                      queue=state.innerPipelineQueue(),
                                                      config=batchState['pipeline_config'],
                                                      overwrite=True)
        batchState['pipeline_task'] = pipeline['task_name']

        yield _updateTask(batchState,
                          lambda t : t.addMessage(tasks.task.MSG_SILENT,
                                                  'Completed run pipeline'
                                                  ).progress())
        batchState['pipeline_state'] = RUNNING_PIPELINE_STATE
        state.updateBatchState()
        
    if batchState['pipeline_state'] == RUNNING_PIPELINE_STATE:
        _log(batchState, 'Pipeline is in RUNNING_PIPELINE state')
        _monitorPipeline(batchState)
        yield _waitForPipeline(batchState)

        yield _updateTask(batchState,
                          lambda t : t.addMessage(tasks.task.MSG_SILENT,
                                                  'Completed running pipeline'
                                                  ).progress())
        batchState['pipeline_state'] = HARVEST_STATE
        state.updateBatchState()

    if batchState['pipeline_state'] == HARVEST_STATE:
        _log(batchState, 'Pipeline is in HARVEST state')
        # Using prerunqueue because we want everything here serialized
        yield state.prerunQueue.addWithDeferred(_harvestTransfer,
                                                batchState)

        yield _updateTask(batchState,
                          lambda t : t.addMessage(tasks.task.MSG_SILENT,
                                                  'Completed harvest'
                                                  ).progress())
        
        batchState['pipeline_state'] = SHUTDOWN_STATE
        state.updateBatchState()

    if batchState['pipeline_state'] == SHUTDOWN_STATE:
        _log(batchState, 'Pipeline is in SHUTDOWN state')

        if 'add_instances_task' in batchState:
            try:
                yield _blockOnTask(batchState['add_instances_task'],
                                   cluster=batchState['pipeline_config']['cluster.CLUSTER_NAME'])
            except Exception, err:
                logging.errorPrint(str(err))
                log.err(err)

        yield clusters_client.terminateCluster('localhost',
                                               batchState['pipeline_config']['cluster.CLUSTER_NAME'],
                                               'guest')
        

        yield _updateTask(batchState,
                          lambda t : t.addMessage(tasks.task.MSG_SILENT,
                                                  'Completed shutdown'
                                                  ).progress())
        
        batchState['pipeline_state'] = COMPLETED_STATE
        batchState['state'] = COMPLETED_STATE
        state.updateBatchState()
Beispiel #2
0
def _run(state, batchState):
    if 'state' not in batchState:
        _log(batchState, 'First time running, creating pipeline state information')
        batchState['pipeline_config'] = yield _applyActions(state.innerPipelineConfig(),
                                                            batchState['actions'])
        batchState['pipeline_state'] = PRESTART_STATE

        # We need to create a fake, local, pipeline for metrics to work
        batchState['pipeline_name'] = pipeline_misc.checksumInput(batchState['pipeline_config'])
        batchState['pipeline_config']['pipeline.PIPELINE_NAME'] = batchState['pipeline_name']
        batchState['pipeline_config']['pipeline.PIPELINE_WRAPPER_NAME'] = batchState['pipeline_name']

        _log(batchState, 'Pipeline named ' + batchState['pipeline_name'])
        
        pipeline = yield pipelines_client.createPipeline(host='localhost',
                                                         clusterName='local',
                                                         userName='******',
                                                         pipelineName=batchState['pipeline_name'],
                                                         protocol='clovr_wrapper',
                                                         queue='pipeline.q',
                                                         config=batchState['pipeline_config'],
                                                         parentPipeline=state.parentPipeline())

        batchState['clovr_wrapper_task_name'] = pipeline['task_name']

        _log(batchState, 'Setting number of tasks to 6 (number in a standard clovr_wrapper)')
        yield _updateTask(batchState,
                          lambda t : t.update(completedTasks=0,
                                              numTasks=6))
        
        state.updateBatchState()
    else:
        _log(batchState, 'Pipeline run before, loading pipeline information')
        pipeline = yield pipelines_client.pipelineList('localhost',
                                                       'local',
                                                       'guest',
                                                       batchState['pipeline_name'],
                                                       detail=True)

    batchState['state'] = RUNNING_STATE

    yield _updateTask(batchState,
                      lambda t : t.setState(tasks.task.TASK_RUNNING))
    
    pipelineConfigFile = os.path.join(TMP_DIR, 'pipeline_configs', global_state.make_ref() + '.conf')
    
    _log(batchState, 'Creating ergatis configuration')
    _writeErgatisConfig(batchState['pipeline_config'], pipelineConfigFile)

    if batchState['pipeline_state'] == PRESTART_STATE:
        _log(batchState, 'Pipeline is in PRESTART state')
        yield state.prerunQueue.addWithDeferred(workflow_runner.run,
                                                state.workflowConfig(),
                                                batchState['pipeline_config']['pipeline.PRESTART_TEMPLATE_XML'],
                                                pipelineConfigFile,
                                                TMP_DIR)

        yield _updateTask(batchState,
                          lambda t : t.addMessage(tasks.task.MSG_SILENT,
                                                  'Completed prestart'
                                                  ).progress())
                               
        batchState['pipeline_state'] = STARTING_STATE
        state.updateBatchState()

    if batchState['pipeline_state'] == STARTING_STATE:
        _log(batchState, 'Pipeline is in STARTING state')
        clusterTask = yield clusters_client.startCluster(
            'localhost',
            batchState['pipeline_config']['cluster.CLUSTER_NAME'],
            'guest',
            int(batchState['pipeline_config']['cluster.EXEC_NODES']),
            0,
            batchState['pipeline_config']['cluster.CLUSTER_CREDENTIAL'],
            {'cluster.master_type': batchState['pipeline_config']['cluster.MASTER_INSTANCE_TYPE'],
             'cluster.master_bid_price': batchState['pipeline_config']['cluster.MASTER_BID_PRICE'],
             'cluster.exec_type': batchState['pipeline_config']['cluster.EXEC_INSTANCE_TYPE'],
             'cluster.exec_bid_price': batchState['pipeline_config']['cluster.EXEC_BID_PRICE']})
        
        taskState = yield tasks.blockOnTask('localhost',
                                            'local',
                                            clusterTask)

        if taskState != tasks.task.TASK_COMPLETED:
            raise TaskError(clusterTask)
        
        yield _updateTask(batchState,
                          lambda t : t.addMessage(tasks.task.MSG_SILENT,
                                                  'Completed start'
                                                  ).progress())

        batchState['pipeline_state'] = PRERUN_STATE
        state.updateBatchState()


    if batchState['pipeline_state'] == PRERUN_STATE:
        _log(batchState, 'Pipeline is in PRERUN state')
        yield state.prerunQueue.addWithDeferred(workflow_runner.run,
                                                state.workflowConfig(),
                                                batchState['pipeline_config']['pipeline.PRERUN_TEMPLATE_XML'],
                                                pipelineConfigFile,
                                                TMP_DIR)

        yield _updateTask(batchState,
                          lambda t : t.addMessage(tasks.task.MSG_SILENT,
                                                  'Completed prerun'
                                                  ).progress())
        batchState['pipeline_state'] = RUN_PIPELINE_STATE
        state.updateBatchState()
        

    if batchState['pipeline_state'] == RUN_PIPELINE_STATE:
        _log(batchState, 'Pipeline is in RUN_PIPELINE state')
        pipeline = yield pipelines_client.runPipeline(host='localhost',
                                                      clusterName=batchState['pipeline_config']['cluster.CLUSTER_NAME'],
                                                      userName='******',
                                                      parentPipeline=batchState['pipeline_name'],
                                                      bareRun=True,
                                                      queue=state.innerPipelineQueue(),
                                                      config=batchState['pipeline_config'],
                                                      overwrite=True)
        batchState['pipeline_task'] = pipeline['task_name']

        yield _updateTask(batchState,
                          lambda t : t.addMessage(tasks.task.MSG_SILENT,
                                                  'Completed run pipeline'
                                                  ).progress())
        batchState['pipeline_state'] = RUNNING_PIPELINE_STATE
        state.updateBatchState()


    if batchState['pipeline_state'] == RUNNING_PIPELINE_STATE:
        _log(batchState, 'Pipeline is in RUNNING_PIPELINE state')
        _monitorPipeline(batchState)
        yield _waitForPipeline(batchState)

        yield _updateTask(batchState,
                          lambda t : t.addMessage(tasks.task.MSG_SILENT,
                                                  'Completed running pipeline'
                                                  ).progress())
        batchState['pipeline_state'] = POSTRUN_STATE
        state.updateBatchState()

    if batchState['pipeline_state'] == POSTRUN_STATE:
        _log(batchState, 'Pipeline is in POSTRUN state')
        yield state.postrunQueue.addWithDeferred(workflow_runner.run,
                                                 state.workflowConfig(),
                                                 batchState['pipeline_config']['pipeline.POSTRUN_TEMPLATE_XML'],
                                                 pipelineConfigFile,
                                                 TMP_DIR)

        yield _updateTask(batchState,
                          lambda t : t.addMessage(tasks.task.MSG_SILENT,
                                                  'Completed postrun'
                                                  ).progress())
                                                   
        batchState['pipeline_state'] = COMPLETED_STATE
        batchState['state'] = COMPLETED_STATE
        state.updateBatchState()

    yield _updateTask(batchState,
                      lambda t : t.setState(tasks.task.TASK_COMPLETED))
    _log(batchState, 'Pipeline finished successfully')
Beispiel #3
0
def _run(state, batchState):
    if 'state' not in batchState:
        _log(batchState,
             'First time running, creating pipeline state information')
        batchState['pipeline_config'] = yield _applyActions(
            state.innerPipelineConfig(), batchState['actions'])
        batchState['pipeline_state'] = PRESTART_STATE

        # We need to create a fake, local, pipeline for metrics to work
        batchState['pipeline_name'] = pipeline_misc.checksumInput(
            batchState['pipeline_config'])
        batchState['pipeline_config']['pipeline.PIPELINE_NAME'] = batchState[
            'pipeline_name']
        batchState['pipeline_config'][
            'pipeline.PIPELINE_WRAPPER_NAME'] = batchState['pipeline_name']

        _log(batchState, 'Pipeline named ' + batchState['pipeline_name'])

        pipeline = yield pipelines_client.createPipeline(
            host='localhost',
            clusterName='local',
            userName='******',
            pipelineName=batchState['pipeline_name'],
            protocol='clovr_wrapper',
            queue='pipeline.q',
            config=batchState['pipeline_config'],
            parentPipeline=state.parentPipeline())

        batchState['clovr_wrapper_task_name'] = pipeline['task_name']

        _log(
            batchState,
            'Setting number of tasks to 6 (number in a standard clovr_wrapper)'
        )
        yield _updateTask(batchState,
                          lambda t: t.update(completedTasks=0, numTasks=6))

        state.updateBatchState()
    else:
        _log(batchState, 'Pipeline run before, loading pipeline information')
        pipeline = yield pipelines_client.pipelineList(
            'localhost',
            'local',
            'guest',
            batchState['pipeline_name'],
            detail=True)

    batchState['state'] = RUNNING_STATE

    yield _updateTask(batchState,
                      lambda t: t.setState(tasks.task.TASK_RUNNING))

    pipelineConfigFile = os.path.join(TMP_DIR, 'pipeline_configs',
                                      global_state.make_ref() + '.conf')

    _log(batchState, 'Creating ergatis configuration')
    _writeErgatisConfig(batchState['pipeline_config'], pipelineConfigFile)

    if batchState['pipeline_state'] == PRESTART_STATE:
        _log(batchState, 'Pipeline is in PRESTART state')
        yield state.prerunQueue.addWithDeferred(
            workflow_runner.run, state.workflowConfig(),
            batchState['pipeline_config']['pipeline.PRESTART_TEMPLATE_XML'],
            pipelineConfigFile, TMP_DIR)

        yield _updateTask(
            batchState, lambda t: t.addMessage(
                tasks.task.MSG_SILENT, 'Completed prestart').progress())

        batchState['pipeline_state'] = STARTING_STATE
        state.updateBatchState()

    if batchState['pipeline_state'] == STARTING_STATE:
        _log(batchState, 'Pipeline is in STARTING state')
        clusterTask = yield clusters_client.startCluster(
            'localhost',
            batchState['pipeline_config']['cluster.CLUSTER_NAME'], 'guest',
            int(batchState['pipeline_config']['cluster.EXEC_NODES']), 0,
            batchState['pipeline_config']['cluster.CLUSTER_CREDENTIAL'], {
                'cluster.master_type':
                batchState['pipeline_config']['cluster.MASTER_INSTANCE_TYPE'],
                'cluster.master_bid_price':
                batchState['pipeline_config']['cluster.MASTER_BID_PRICE'],
                'cluster.exec_type':
                batchState['pipeline_config']['cluster.EXEC_INSTANCE_TYPE'],
                'cluster.exec_bid_price':
                batchState['pipeline_config']['cluster.EXEC_BID_PRICE']
            })

        taskState = yield tasks.blockOnTask('localhost', 'local', clusterTask)

        if taskState != tasks.task.TASK_COMPLETED:
            raise TaskError(clusterTask)

        yield _updateTask(
            batchState, lambda t: t.addMessage(tasks.task.MSG_SILENT,
                                               'Completed start').progress())

        batchState['pipeline_state'] = PRERUN_STATE
        state.updateBatchState()

    if batchState['pipeline_state'] == PRERUN_STATE:
        _log(batchState, 'Pipeline is in PRERUN state')
        yield state.prerunQueue.addWithDeferred(
            workflow_runner.run, state.workflowConfig(),
            batchState['pipeline_config']['pipeline.PRERUN_TEMPLATE_XML'],
            pipelineConfigFile, TMP_DIR)

        yield _updateTask(
            batchState, lambda t: t.addMessage(tasks.task.MSG_SILENT,
                                               'Completed prerun').progress())
        batchState['pipeline_state'] = RUN_PIPELINE_STATE
        state.updateBatchState()

    if batchState['pipeline_state'] == RUN_PIPELINE_STATE:
        _log(batchState, 'Pipeline is in RUN_PIPELINE state')
        pipeline = yield pipelines_client.runPipeline(
            host='localhost',
            clusterName=batchState['pipeline_config']['cluster.CLUSTER_NAME'],
            userName='******',
            parentPipeline=batchState['pipeline_name'],
            bareRun=True,
            queue=state.innerPipelineQueue(),
            config=batchState['pipeline_config'],
            overwrite=True)
        batchState['pipeline_task'] = pipeline['task_name']

        yield _updateTask(
            batchState, lambda t: t.addMessage(
                tasks.task.MSG_SILENT, 'Completed run pipeline').progress())
        batchState['pipeline_state'] = RUNNING_PIPELINE_STATE
        state.updateBatchState()

    if batchState['pipeline_state'] == RUNNING_PIPELINE_STATE:
        _log(batchState, 'Pipeline is in RUNNING_PIPELINE state')
        _monitorPipeline(batchState)
        yield _waitForPipeline(batchState)

        yield _updateTask(
            batchState,
            lambda t: t.addMessage(tasks.task.MSG_SILENT,
                                   'Completed running pipeline').progress())
        batchState['pipeline_state'] = POSTRUN_STATE
        state.updateBatchState()

    if batchState['pipeline_state'] == POSTRUN_STATE:
        _log(batchState, 'Pipeline is in POSTRUN state')
        yield state.postrunQueue.addWithDeferred(
            workflow_runner.run, state.workflowConfig(),
            batchState['pipeline_config']['pipeline.POSTRUN_TEMPLATE_XML'],
            pipelineConfigFile, TMP_DIR)

        yield _updateTask(
            batchState, lambda t: t.addMessage(tasks.task.MSG_SILENT,
                                               'Completed postrun').progress())

        batchState['pipeline_state'] = COMPLETED_STATE
        batchState['state'] = COMPLETED_STATE
        state.updateBatchState()

    yield _updateTask(batchState,
                      lambda t: t.setState(tasks.task.TASK_COMPLETED))
    _log(batchState, 'Pipeline finished successfully')
Beispiel #4
0
def _run(state, batchState):
    if 'pipeline_name' in batchState:
        pipelines = yield pipelines_client.pipelineList(
            'localhost',
            'local',
            'guest',
            batchState['pipeline_name'],
            detail=True)
    else:
        pipelines = []

    if not pipelines:
        _log(batchState,
             'First time running, creating pipeline state information')
        batchState['pipeline_config'] = yield _applyActions(
            state.innerPipelineConfig(), batchState['actions'])
        batchState['pipeline_state'] = STARTCLUSTER_STATE

        # We need to create a fake, local, pipeline for metrics to work
        batchState['pipeline_name'] = pipeline_misc.checksumInput(
            batchState['pipeline_config'])
        batchState['pipeline_config']['pipeline.PIPELINE_NAME'] = batchState[
            'pipeline_name']
        batchState['pipeline_config'][
            'pipeline.PIPELINE_WRAPPER_NAME'] = batchState['pipeline_name']

        _log(batchState, 'Pipeline named ' + batchState['pipeline_name'])

        pipeline = yield pipelines_client.createPipeline(
            host='localhost',
            clusterName='local',
            userName='******',
            pipelineName=batchState['pipeline_name'],
            protocol='clovr_wrapper',
            queue='pipeline.q',
            config=batchState['pipeline_config'],
            parentPipeline=state.parentPipeline())

        batchState['lgt_wrapper_task_name'] = pipeline['task_name']

        _log(
            batchState,
            'Setting number of tasks to 9 (number in a standard lgt_wrapper)')
        yield _updateTask(batchState,
                          lambda t: t.update(completedTasks=0, numTasks=9))

        state.updateBatchState()

    batchState['state'] = RUNNING_STATE

    _log(batchState,
         'Pipeline started in %s state' % batchState['pipeline_state'])

    yield _updateTask(batchState,
                      lambda t: t.setState(tasks.task.TASK_RUNNING))

    pipelineConfigFile = os.path.join(TMP_DIR, 'pipeline_configs',
                                      global_state.make_ref() + '.conf')

    _log(batchState, 'Creating ergatis configuration')
    _writeErgatisConfig(batchState['pipeline_config'], pipelineConfigFile)

    if batchState['pipeline_state'] == STARTCLUSTER_STATE:
        _log(batchState, 'Pipeline is in STARTCLUSTER state')

        # First see if the cluster exists but is unresponsive
        try:
            cluster = yield loadCluster(
                'localhost',
                batchState['pipeline_config']['cluster.CLUSTER_NAME'], 'guest')
            if cluster['state'] == 'unresponsive':
                _log(batchState, 'Pipeline is unresponsive, terminating')
                terminateTask = yield clusters_client.terminateCluster(
                    'localhost',
                    batchState['pipeline_config']['cluster.CLUSTER_NAME'],
                    'guest')
                yield _blockOnTask(terminateTask)
        except:
            pass

        batchState['cluster_task'] = yield startCluster(
            batchState, 'localhost',
            batchState['pipeline_config']['cluster.CLUSTER_NAME'], 'guest',
            int(batchState['pipeline_config']['cluster.EXEC_NODES']), 0,
            batchState['pipeline_config']['cluster.CLUSTER_CREDENTIAL'], {
                'cluster.MASTER_INSTANCE_TYPE':
                batchState['pipeline_config']['cluster.MASTER_INSTANCE_TYPE'],
                'cluster.MASTER_BID_PRICE':
                batchState['pipeline_config']['cluster.MASTER_BID_PRICE'],
                'cluster.EXEC_INSTANCE_TYPE':
                batchState['pipeline_config']['cluster.EXEC_INSTANCE_TYPE'],
                'cluster.EXEC_BID_PRICE':
                batchState['pipeline_config']['cluster.EXEC_BID_PRICE']
            })

        yield _updateTask(batchState,
                          lambda t: t.update(completedTasks=0, numTasks=9))

        yield _updateTask(
            batchState, lambda t: t.addMessage(
                tasks.task.MSG_SILENT, 'Completed startcluster').progress())

        batchState['pipeline_state'] = REMOTE_LOCAL_TRANSFER_STATE
        state.updateBatchState()

    if batchState['pipeline_state'] == REMOTE_LOCAL_TRANSFER_STATE:
        _log(batchState, 'Pipeline is in REMOTE_LOCAL_TRANSFER')

        _log(batchState, 'Making sure cluster is exists in some form')
        cluster = yield clusters_client.loadCluster(
            'localhost', batchState['pipeline_config']['cluster.CLUSTER_NAME'],
            'guest')

        if cluster['state'] == 'unresponsive':
            _log(batchState,
                 'Pipeline is unresponsive, erroring and restarting')
            raise Exception('Cluster is not responsive')

        yield state.prerunQueue.addWithDeferred(_remoteLocalTransfer,
                                                batchState)

        yield _updateTask(
            batchState, lambda t: t.addMessage(
                tasks.task.MSG_SILENT, 'Completed remote_local_transfer').
            progress())

        batchState['pipeline_state'] = DECRYPT_STATE
        state.updateBatchState()

    if batchState['pipeline_state'] == DECRYPT_STATE:
        _log(batchState, 'Pipeline is in DECRYPT')

        cluster = yield loadCluster(
            'localhost', batchState['pipeline_config']['cluster.CLUSTER_NAME'],
            'guest')

        tag = yield tags_client.loadTag(
            'localhost', batchState['pipeline_config']['cluster.CLUSTER_NAME'],
            'guest', _decryptTagName(batchState))

        conf = config.configFromStream(open('/tmp/machine.conf'))

        yield ssh.runProcessSSH(cluster['master']['public_dns'],
                                'mkdir -p /mnt/lgt_decrypt',
                                stdoutf=None,
                                stderrf=None,
                                sshUser=conf('ssh.user'),
                                sshFlags=conf('ssh.options'),
                                log=True)

        yield rsync.rsyncTo(
            cluster['master']['public_dns'],
            batchState['pipeline_config']['params.DECRYPT_SCRIPT'],
            '/mnt/',
            options=conf('rsync.options'),
            user=conf('rsync.user'),
            log=True)

        for f in tag['files']:
            decryptCmd = ' '.join([
                os.path.join(
                    '/mnt',
                    os.path.basename(batchState['pipeline_config']
                                     ['params.DECRYPT_SCRIPT'])), f,
                '-out-dir', '/mnt/lgt_decrypt', '-remove-encrypted',
                '-password',
                batchState['pipeline_config']['params.DECRYPT_PASSWORD']
            ])

            yield ssh.getOutput(cluster['master']['public_dns'],
                                decryptCmd,
                                sshUser=conf('ssh.user'),
                                sshFlags=conf('ssh.options'),
                                expected=[0, 253],
                                log=True)

        tag = yield tags_client.tagData(
            host='localhost',
            clusterName=batchState['pipeline_config']['cluster.CLUSTER_NAME'],
            userName='******',
            action='overwrite',
            tagName=_decryptTagName(batchState),
            files=['/mnt/lgt_decrypt'],
            metadata={},
            recursive=True,
            expand=False,
            compressDir=None)

        _log(
            batchState, 'Waiting for tagging of %s to complete - %s' %
            (_decryptTagName(batchState), tag['task_name']))

        yield _blockOnTask(
            tag['task_name'],
            cluster=batchState['pipeline_config']['cluster.CLUSTER_NAME'])

        yield _updateTask(
            batchState, lambda t: t.addMessage(tasks.task.MSG_SILENT,
                                               'Completed decrypt').progress())

        batchState['pipeline_state'] = REFERENCE_TRANSFER_STATE
        state.updateBatchState()

    if batchState['pipeline_state'] == REFERENCE_TRANSFER_STATE:
        _log(batchState, 'Pipeline is in REFERENCE_TRANSFER state')

        transfers = []
        tags = (batchState['pipeline_config']['input.REF_TAG1'].split(',') +
                batchState['pipeline_config']['input.REF_TAG2'].split(','))
        for tag in tags:
            tag = tag.strip()
            output = yield _getOutput(batchState, [
                'vp-transfer-dataset', '-t', '--tag-name=' + tag,
                '--dst-cluster=' +
                batchState['pipeline_config']['cluster.CLUSTER_NAME']
            ],
                                      log=True)

            transfers.append(output['stdout'].strip())

        for task in transfers:
            yield _blockOnTask(task)

        yield _updateTask(
            batchState,
            lambda t: t.addMessage(tasks.task.MSG_SILENT,
                                   'Completed reference_transfer').progress())

        batchState['pipeline_state'] = RUN_PIPELINE_STATE
        state.updateBatchState()

    if batchState['pipeline_state'] == RUN_PIPELINE_STATE:
        _log(batchState, 'Pipeline is in RUN_PIPELINE state')
        batchState['pipeline_config']['input.INPUT_TAG'] = _decryptTagName(
            batchState)
        pipeline = yield pipelines_client.runPipeline(
            host='localhost',
            clusterName=batchState['pipeline_config']['cluster.CLUSTER_NAME'],
            userName='******',
            parentPipeline=batchState['pipeline_name'],
            bareRun=True,
            queue=state.innerPipelineQueue(),
            config=batchState['pipeline_config'],
            overwrite=True)
        batchState['pipeline_task'] = pipeline['task_name']

        yield _updateTask(
            batchState, lambda t: t.addMessage(
                tasks.task.MSG_SILENT, 'Completed run pipeline').progress())
        batchState['pipeline_state'] = RUNNING_PIPELINE_STATE
        state.updateBatchState()

    if batchState['pipeline_state'] == RUNNING_PIPELINE_STATE:
        _log(batchState, 'Pipeline is in RUNNING_PIPELINE state')
        _monitorPipeline(batchState)
        yield _waitForPipeline(batchState)

        yield _updateTask(
            batchState,
            lambda t: t.addMessage(tasks.task.MSG_SILENT,
                                   'Completed running pipeline').progress())
        batchState['pipeline_state'] = HARVEST_STATE
        state.updateBatchState()

    if batchState['pipeline_state'] == HARVEST_STATE:
        _log(batchState, 'Pipeline is in HARVEST state')
        # Using prerunqueue because we want everything here serialized
        yield state.prerunQueue.addWithDeferred(_harvestTransfer, batchState)

        yield _updateTask(
            batchState, lambda t: t.addMessage(tasks.task.MSG_SILENT,
                                               'Completed harvest').progress())

        batchState['pipeline_state'] = SHUTDOWN_STATE
        state.updateBatchState()

    if batchState['pipeline_state'] == SHUTDOWN_STATE:
        _log(batchState, 'Pipeline is in SHUTDOWN state')

        if 'add_instances_task' in batchState:
            try:
                yield _blockOnTask(batchState['add_instances_task'],
                                   cluster=batchState['pipeline_config']
                                   ['cluster.CLUSTER_NAME'])
            except Exception, err:
                logging.errorPrint(str(err))
                log.err(err)

        yield clusters_client.terminateCluster(
            'localhost', batchState['pipeline_config']['cluster.CLUSTER_NAME'],
            'guest')

        yield _updateTask(
            batchState, lambda t: t.addMessage(
                tasks.task.MSG_SILENT, 'Completed shutdown').progress())

        batchState['pipeline_state'] = COMPLETED_STATE
        batchState['state'] = COMPLETED_STATE
        state.updateBatchState()