Beispiel #1
0
def _harvestTransfer(batchState):
    tagName = '%s_sam_files' % batchState['pipeline_name']
    outputName = 'lgt_batch_%03d_sam_files' % batchState['batch_num']
    
    yield _getOutput(batchState,
                     ['vp-transfer-dataset',
                      '--tag-name=' + tagName,
                      '--src-cluster=' + batchState['pipeline_config']['cluster.CLUSTER_NAME']],
                     log=True)


    tag = yield tags_client.loadTag('localhost',
                                    'local',
                                    'guest',
                                    tagName)
    
    dst = (batchState['pipeline_config']['params.REMOTE_USER'] +
           '@' +
           batchState['pipeline_config']['params.REMOTE_HOST'] +
           ':' +
           batchState['pipeline_config']['params.DATA_OUTPUT_DIRECTORY'] + '/' + outputName + '/')

    try:
        for f in tag['files']:
            rsyncCmd = RSYNC + [f, dst]
            yield _getOutput(batchState,
                             rsyncCmd,
                             log=True)
    finally:
        _getOutput(batchState,
                   ['vp-delete-dataset',
                    '--tag-name=' + tagName,
                    '--delete'],
                   log=True)
Beispiel #2
0
def _harvestTransfer(batchState):
    tagName = '%s_sam_files' % batchState['pipeline_name']
    outputName = 'lgt_batch_%03d_sam_files' % batchState['batch_num']

    yield _getOutput(batchState, [
        'vp-transfer-dataset', '--tag-name=' + tagName, '--src-cluster=' +
        batchState['pipeline_config']['cluster.CLUSTER_NAME']
    ],
                     log=True)

    tag = yield tags_client.loadTag('localhost', 'local', 'guest', tagName)

    dst = (batchState['pipeline_config']['params.REMOTE_USER'] + '@' +
           batchState['pipeline_config']['params.REMOTE_HOST'] + ':' +
           batchState['pipeline_config']['params.DATA_OUTPUT_DIRECTORY'] +
           '/' + outputName + '/')

    try:
        for f in tag['files']:
            rsyncCmd = RSYNC + [f, dst]
            yield _getOutput(batchState, rsyncCmd, log=True)
    finally:
        _getOutput(batchState,
                   ['vp-delete-dataset', '--tag-name=' + tagName, '--delete'],
                   log=True)
Beispiel #3
0
def _downloadTag(request):
    remoteTag = yield www_tags.loadTag('localhost',
                                       request.body['src_cluster'],
                                       request.body['user_name'],
                                       request.body['tag_name'])

    srcClusters = yield www_clusters.listClusters(
        'localhost', {'cluster_name': request.body['src_cluster']},
        request.body['user_name'])

    srcCluster = srcClusters[0]

    dstClusters = yield www_clusters.listClusters(
        'localhost', {'cluster_name': request.body['dst_cluster']},
        request.body['user_name'])

    dstCluster = dstClusters[0]

    dstTagPath = os.path.join(dstCluster['config']['dirs.upload_dir'],
                              remoteTag['tag_name'])

    baseDirFiles, nonBaseDirFiles = _partitionFiles(
        remoteTag['files'], remoteTag['metadata']['tag_base_dir'])

    if baseDirFiles:
        yield rsync.rsyncFrom(srcCluster['master']['public_dns'],
                              remoteTag['metadata']['tag_base_dir'],
                              dstTagPath,
                              baseDirFiles,
                              dstCluster['config']['rsync.options'],
                              dstCluster['config']['rsync.user'],
                              log=True)

    if nonBaseDirFiles:
        yield rsync.rsyncFrom(srcCluster['master']['public_dns'],
                              '/',
                              dstTagPath,
                              nonBaseDirFiles,
                              dstCluster['config']['rsync.options'],
                              dstCluster['config']['rsync.user'],
                              log=True)

    remoteFiles = ([os.path.join(dstTagPath, f) for f in baseDirFiles] + [
        os.path.join(dstTagPath, _makePathRelative(f)) for f in nonBaseDirFiles
    ])

    defer.returnValue(
        persist.Tag(tagName=remoteTag['tag_name'],
                    files=remoteFiles,
                    metadata=func.updateDict(remoteTag['metadata'],
                                             {'tag_base_dir': dstTagPath}),
                    phantom=remoteTag['phantom'],
                    taskName=None))
Beispiel #4
0
def _downloadTag(request):
    remoteTag = yield www_tags.loadTag('localhost',
                                       request.body['src_cluster'],
                                       request.body['user_name'],
                                       request.body['tag_name'])

    srcClusters = yield www_clusters.listClusters('localhost',
                                                  {'cluster_name': request.body['src_cluster']},
                                                  request.body['user_name'])

    srcCluster = srcClusters[0]
    
    dstClusters = yield www_clusters.listClusters('localhost',
                                                  {'cluster_name': request.body['dst_cluster']},
                                                  request.body['user_name'])

    dstCluster = dstClusters[0]

    dstTagPath = os.path.join(dstCluster['config']['dirs.upload_dir'], remoteTag['tag_name'])

    baseDirFiles, nonBaseDirFiles = _partitionFiles(remoteTag['files'], remoteTag['metadata']['tag_base_dir'])


    if baseDirFiles:
        yield rsync.rsyncFrom(srcCluster['master']['public_dns'],
                              remoteTag['metadata']['tag_base_dir'],
                              dstTagPath,
                              baseDirFiles,
                              dstCluster['config']['rsync.options'],
                              dstCluster['config']['rsync.user'],
                              log=True)

    if nonBaseDirFiles:
        yield rsync.rsyncFrom(srcCluster['master']['public_dns'],
                              '/',
                              dstTagPath,
                              nonBaseDirFiles,
                              dstCluster['config']['rsync.options'],
                              dstCluster['config']['rsync.user'],
                              log=True)

    remoteFiles = ([os.path.join(dstTagPath, f) for f in baseDirFiles] +
                   [os.path.join(dstTagPath, _makePathRelative(f)) for f in nonBaseDirFiles])
    
    defer.returnValue(persist.Tag(tagName=remoteTag['tag_name'],
                                  files=remoteFiles,
                                  metadata=func.updateDict(remoteTag['metadata'],
                                                           {'tag_base_dir': dstTagPath}),
                                  phantom=remoteTag['phantom'],
                                  taskName=None))
Beispiel #5
0
def _realizeUrls(request):
    localTag = yield request.state.tagPersist.loadTag(request.body['tag_name'])
    
    # If we have urls we create a fake phantom tag
    fakePhantom = {'cluster.ALL.command':
                   'reliableDownloader.py -m 300 -t 20 -b ${base_dir} ' + ' '.join(localTag.metadata['urls'])}
    taskName = yield www_tags.realizePhantom('localhost',
                                             request.body['dst_cluster'],
                                             request.body['user_name'],
                                             localTag.tagName,
                                             fakePhantom,
                                             func.updateDict(localTag.metadata, {'urls_realized': True}))
    localTask = yield tasks_tx.loadTask(request.body['task_name'])
    endState, tsk = yield tasks_tx.blockOnTaskAndForward('localhost',
                                                         request.body['dst_cluster'],
                                                         taskName,
                                                         localTask)

    if endState == tasks_tx.task.TASK_FAILED:
        yield tasks_tx.updateTask(request.body['task_name'],
                                  lambda t : t.setState(tasks_tx.task.TASK_FAILED))
        raise RealizePhantomError(request.body['tag_name'])


    if request.body['dst_cluster'] == 'local':
        yield tag_mq_data.tagData(request.state,
                                  request.body['tag_name'],
                                  request.body['task_name'],
                                  files=localTag.files,
                                  action=tag_mq_data.ACTION_APPEND,
                                  metadata={},
                                  recursive=False,
                                  expand=False,
                                  compressDir=None)
    else:
        localTask = yield www_tags.tagData('localhost',
                                           request.body['dst_cluster'],
                                           request.body['user_name'],
                                           action=tag_mq_data.ACTION_APPEND,
                                           tagName=localTag.tagName,
                                           files=localTag.files,
                                           metadata={},
                                           recursive=False,
                                           expand=False,
                                           compressDir=None)

    localTask = yield tasks_tx.loadTask(request.body['task_name'])
    endState, tsk = yield tasks_tx.blockOnTaskAndForward('localhost',
                                                         request.body['dst_cluster'],
                                                         taskName,
                                                         localTask)

    if endState == tasks_tx.task.TASK_FAILED:
        yield tasks_tx.updateTask(request.body['task_name'],
                                  lambda t : t.setState(tasks_tx.task.TASK_FAILED))
        raise RealizePhantomError(request.body['tag_name'])
    
    # Load the tag up and return it so we can have the files it created
    tag = yield www_tags.loadTag('localhost',
                                 request.body['dst_cluster'],
                                 request.body['user_name'],
                                 request.body['tag_name'])

    defer.returnValue(tag)
Beispiel #6
0
def _handleTransferTag(request):
    yield tasks_tx.updateTask(request.body['task_name'],
                              lambda t : t.setState(tasks_tx.task.TASK_RUNNING).update(numTasks=2))

    srcTag = yield www_tags.loadTag('localhost',
                                    request.body['src_cluster'],
                                    request.body['user_name'],
                                    request.body['tag_name'])

    if not srcTag['phantom'] and (request.body['src_cluster'] != 'local' or request.body['dst_cluster'] != 'local'):
        if request.body['src_cluster'] == 'local':
            tag = yield _uploadTag(request)
        elif request.body['dst_cluster'] == 'local':
            tag = yield _downloadTag(request)
        else:
            raise NoLocalClusterError('Source cluster or destination cluster must be local')

        yield tasks_tx.updateTask(request.body['task_name'],
                                  lambda t : t.progress())

        if request.body.get('compress', False) or request.body.get('compress_dir', False):
            defaultDir = '/mnt/output' if request.body['dst_cluster'] == 'local' else tag.metadata['tag_base_dir']
            compressDir = request.body.get('compress_dir') if request.body.get('compress_dir', False) else defaultDir 
        else:
            compressDir = None
        
        if request.body['dst_cluster'] == 'local':
            yield tag_mq_data.tagData(request.state,
                                      request.body['tag_name'],
                                      request.body['task_name'],
                                      files=tag.files,
                                      action=tag_mq_data.ACTION_OVERWRITE,
                                      metadata=tag.metadata,
                                      recursive=False,
                                      expand=False,
                                      compressDir=compressDir)
        else:
            newTag = yield www_tags.tagData('localhost',
                                            request.body['dst_cluster'],
                                            request.body['user_name'],
                                            action=tag_mq_data.ACTION_OVERWRITE,
                                            tagName=tag.tagName,
                                            files=tag.files,
                                            metadata=tag.metadata,
                                            recursive=False,
                                            expand=False,
                                            compressDir=compressDir)

            localTask = yield tasks_tx.loadTask(request.body['task_name'])
            endState, tsk = yield tasks_tx.blockOnTaskAndForward('localhost',
                                                                 request.body['dst_cluster'],
                                                                 newTag['task_name'],
                                                                 localTask)
            if endState == tasks_tx.task.TASK_FAILED:
                yield tasks_tx.updateTask(request.body['task_name'],
                                          lambda t : t.setState(tasks_tx.task.TASK_FAILED))
                raise TransferTagError(request.body['tag_name'])
    
        yield tasks_tx.updateTask(request.body['task_name'],
                                  lambda t : t.progress())
    elif not srcTag['phantom'] and srcTag['metadata'].get('urls', []) and not srcTag['metadata'].get('urls_realized', False):
        # It's a local to local but we have urls and haven't realized them
        yield _realizeUrls(request)
        yield tasks_tx.updateTask(request.body['task_name'],
                                  lambda t : t.progress(2))
    elif srcTag['phantom']:
        # Upload the depends file
        srcClusters = yield www_clusters.listClusters('localhost',
                                                      {'cluster_name': request.body['src_cluster']},
                                                      request.body['user_name'])

        srcCluster = srcClusters[0]
        
        dstClusters = yield www_clusters.listClusters('localhost',
                                                      {'cluster_name': request.body['dst_cluster']},
                                                      request.body['user_name'])

        dstCluster = dstClusters[0]
        
        dependsOn = srcTag['phantom'].get('depends_on', '').split()
        yield rsync.rsyncTo(dstCluster['master']['public_dns'],
                            '/',
                            '/',
                            dependsOn,
                            srcCluster['config']['rsync.options'],
                            srcCluster['config']['rsync.user'],
                            log=True)
        
        
        taskName = yield www_tags.realizePhantom('localhost',
                                                 request.body['dst_cluster'],
                                                 request.body['user_name'],
                                                 srcTag['tag_name'],
                                                 srcTag['phantom'],
                                                 srcTag['metadata'])
        localTask = yield tasks_tx.loadTask(request.body['task_name'])
        endState, tsk = yield tasks_tx.blockOnTaskAndForward('localhost',
                                                             request.body['dst_cluster'],
                                                             taskName,
                                                             localTask)
        if endState == tasks_tx.task.TASK_FAILED:
            yield tasks_tx.updateTask(request.body['task_name'],
                                      lambda t : t.setState(tasks_tx.task.TASK_FAILED))
            raise RealizePhantomError(request.body['tag_name'])
        yield tasks_tx.updateTask(request.body['task_name'],
                                  lambda t : t.update(numTasks=1).progress())
    else:
        yield tag_mq_data.tagData(request.state,
                                  request.body['tag_name'],
                                  request.body['task_name'],
                                  files=[],
                                  action=tag_mq_data.ACTION_APPEND,
                                  metadata={},
                                  recursive=False,
                                  expand=False,
                                  compressDir='/mnt/output' if request.body.get('compress', False) else None)
        
        yield tasks_tx.updateTask(request.body['task_name'],
                                  lambda t : t.progress(2))
        
    defer.returnValue(request)
Beispiel #7
0
def _run(state, batchState):
    if 'pipeline_name' in batchState:
        pipelines = yield pipelines_client.pipelineList('localhost',
                                                        'local',
                                                        'guest',
                                                        batchState['pipeline_name'],
                                                        detail=True)
    else:
        pipelines = []
        
    if not pipelines:
        _log(batchState, 'First time running, creating pipeline state information')
        batchState['pipeline_config'] = yield _applyActions(state.innerPipelineConfig(),
                                                            batchState['actions'])
        batchState['pipeline_state'] = STARTCLUSTER_STATE

        # We need to create a fake, local, pipeline for metrics to work
        batchState['pipeline_name'] = pipeline_misc.checksumInput(batchState['pipeline_config'])
        batchState['pipeline_config']['pipeline.PIPELINE_NAME'] = batchState['pipeline_name']
        batchState['pipeline_config']['pipeline.PIPELINE_WRAPPER_NAME'] = batchState['pipeline_name']

        _log(batchState, 'Pipeline named ' + batchState['pipeline_name'])
        
        pipeline = yield pipelines_client.createPipeline(host='localhost',
                                                         clusterName='local',
                                                         userName='******',
                                                         pipelineName=batchState['pipeline_name'],
                                                         protocol='clovr_wrapper',
                                                         queue='pipeline.q',
                                                         config=batchState['pipeline_config'],
                                                         parentPipeline=state.parentPipeline())

        batchState['lgt_wrapper_task_name'] = pipeline['task_name']

        _log(batchState, 'Setting number of tasks to 9 (number in a standard lgt_wrapper)')
        yield _updateTask(batchState,
                          lambda t : t.update(completedTasks=0,
                                              numTasks=9))
        
        state.updateBatchState()

    batchState['state'] = RUNNING_STATE

    _log(batchState, 'Pipeline started in %s state' % batchState['pipeline_state'])

    yield _updateTask(batchState,
                      lambda t : t.setState(tasks.task.TASK_RUNNING))
    
    pipelineConfigFile = os.path.join(TMP_DIR, 'pipeline_configs', global_state.make_ref() + '.conf')
    
    _log(batchState, 'Creating ergatis configuration')
    _writeErgatisConfig(batchState['pipeline_config'], pipelineConfigFile)

    if batchState['pipeline_state'] == STARTCLUSTER_STATE:
        _log(batchState, 'Pipeline is in STARTCLUSTER state')

        # First see if the cluster exists but is unresponsive
        try:
            cluster = yield loadCluster('localhost',
                                        batchState['pipeline_config']['cluster.CLUSTER_NAME'],
                                        'guest')
            if cluster['state'] == 'unresponsive':
                _log(batchState, 'Pipeline is unresponsive, terminating')
                terminateTask = yield clusters_client.terminateCluster('localhost',
                                                                       batchState['pipeline_config']['cluster.CLUSTER_NAME'],
                                                                       'guest')
                yield _blockOnTask(terminateTask)
        except:
            pass

        batchState['cluster_task'] = yield startCluster(
            batchState,
            'localhost',
            batchState['pipeline_config']['cluster.CLUSTER_NAME'],
            'guest',
            int(batchState['pipeline_config']['cluster.EXEC_NODES']),
            0,
            batchState['pipeline_config']['cluster.CLUSTER_CREDENTIAL'],
            {'cluster.MASTER_INSTANCE_TYPE':
                 batchState['pipeline_config']['cluster.MASTER_INSTANCE_TYPE'],
             'cluster.MASTER_BID_PRICE':
                 batchState['pipeline_config']['cluster.MASTER_BID_PRICE'],
             'cluster.EXEC_INSTANCE_TYPE':
                 batchState['pipeline_config']['cluster.EXEC_INSTANCE_TYPE'],
             'cluster.EXEC_BID_PRICE':
                 batchState['pipeline_config']['cluster.EXEC_BID_PRICE']})

        yield _updateTask(batchState,
                          lambda t : t.update(completedTasks=0,
                                              numTasks=9))
        
        yield _updateTask(batchState,
                          lambda t : t.addMessage(tasks.task.MSG_SILENT,
                                                  'Completed startcluster'
                                                  ).progress())
        
        batchState['pipeline_state'] = REMOTE_LOCAL_TRANSFER_STATE
        state.updateBatchState()

    if batchState['pipeline_state'] == REMOTE_LOCAL_TRANSFER_STATE:
        _log(batchState, 'Pipeline is in REMOTE_LOCAL_TRANSFER')

        _log(batchState, 'Making sure cluster is exists in some form')
        cluster = yield clusters_client.loadCluster('localhost',
                                                    batchState['pipeline_config']['cluster.CLUSTER_NAME'],
                                                    'guest')

        if cluster['state'] == 'unresponsive':
            _log(batchState, 'Pipeline is unresponsive, erroring and restarting')
            raise Exception('Cluster is not responsive')
        
        yield state.prerunQueue.addWithDeferred(_remoteLocalTransfer,
                                                batchState)

        yield _updateTask(batchState,
                          lambda t : t.addMessage(tasks.task.MSG_SILENT,
                                                  'Completed remote_local_transfer'
                                                  ).progress())

        batchState['pipeline_state'] = DECRYPT_STATE
        state.updateBatchState()

    if batchState['pipeline_state'] == DECRYPT_STATE:
        _log(batchState, 'Pipeline is in DECRYPT')

        cluster = yield loadCluster('localhost',
                                    batchState['pipeline_config']['cluster.CLUSTER_NAME'],
                                    'guest')

        tag = yield tags_client.loadTag('localhost',
                                        batchState['pipeline_config']['cluster.CLUSTER_NAME'],
                                        'guest',
                                        _decryptTagName(batchState))

        conf = config.configFromStream(open('/tmp/machine.conf'))

        yield ssh.runProcessSSH(cluster['master']['public_dns'],
                                'mkdir -p /mnt/lgt_decrypt',
                                stdoutf=None,
                                stderrf=None,
                                sshUser=conf('ssh.user'),
                                sshFlags=conf('ssh.options'),
                                log=True)

        yield rsync.rsyncTo(cluster['master']['public_dns'],
                            batchState['pipeline_config']['params.DECRYPT_SCRIPT'],
                            '/mnt/',
                            options=conf('rsync.options'),
                            user=conf('rsync.user'),
                            log=True)
        
        for f in tag['files']:
            decryptCmd = ' '.join([os.path.join('/mnt', os.path.basename(batchState['pipeline_config']['params.DECRYPT_SCRIPT'])),
                                   f,
                                   '-out-dir', '/mnt/lgt_decrypt',
                                   '-remove-encrypted',
                                   '-password', batchState['pipeline_config']['params.DECRYPT_PASSWORD']])
                                       
            
            yield ssh.getOutput(cluster['master']['public_dns'],
                                decryptCmd,
                                sshUser=conf('ssh.user'),
                                sshFlags=conf('ssh.options'),
                                expected=[0, 253],
                                log=True)

        tag = yield tags_client.tagData(host='localhost',
                                        clusterName=batchState['pipeline_config']['cluster.CLUSTER_NAME'],
                                        userName='******',
                                        action='overwrite',
                                        tagName=_decryptTagName(batchState),
                                        files=['/mnt/lgt_decrypt'],
                                        metadata={},
                                        recursive=True,
                                        expand=False,
                                        compressDir=None)

        _log(batchState, 'Waiting for tagging of %s to complete - %s' % (_decryptTagName(batchState),
                                                                         tag['task_name']))

        yield _blockOnTask(tag['task_name'],
                           cluster=batchState['pipeline_config']['cluster.CLUSTER_NAME'])
        
        yield _updateTask(batchState,
                          lambda t : t.addMessage(tasks.task.MSG_SILENT,
                                                  'Completed decrypt'
                                                  ).progress())

        batchState['pipeline_state'] = REFERENCE_TRANSFER_STATE
        state.updateBatchState()
        

    if batchState['pipeline_state'] == REFERENCE_TRANSFER_STATE:
        _log(batchState, 'Pipeline is in REFERENCE_TRANSFER state')
        
        transfers = []
        tags = (batchState['pipeline_config']['input.REF_TAG1'].split(',') +
                batchState['pipeline_config']['input.REF_TAG2'].split(','))
        for tag in tags:
            tag = tag.strip()
            output = yield _getOutput(batchState,
                                      ['vp-transfer-dataset',
                                       '-t',
                                       '--tag-name=' + tag,
                                       '--dst-cluster=' + batchState['pipeline_config']['cluster.CLUSTER_NAME']],
                                      log=True)
            
            transfers.append(output['stdout'].strip())

        for task in transfers:
            yield _blockOnTask(task)

        yield _updateTask(batchState,
                          lambda t : t.addMessage(tasks.task.MSG_SILENT,
                                                  'Completed reference_transfer'
                                                  ).progress())

        batchState['pipeline_state'] = RUN_PIPELINE_STATE
        state.updateBatchState()


    if batchState['pipeline_state'] == RUN_PIPELINE_STATE:
        _log(batchState, 'Pipeline is in RUN_PIPELINE state')
        batchState['pipeline_config']['input.INPUT_TAG'] = _decryptTagName(batchState)
        pipeline = yield pipelines_client.runPipeline(host='localhost',
                                                      clusterName=batchState['pipeline_config']['cluster.CLUSTER_NAME'],
                                                      userName='******',
                                                      parentPipeline=batchState['pipeline_name'],
                                                      bareRun=True,
                                                      queue=state.innerPipelineQueue(),
                                                      config=batchState['pipeline_config'],
                                                      overwrite=True)
        batchState['pipeline_task'] = pipeline['task_name']

        yield _updateTask(batchState,
                          lambda t : t.addMessage(tasks.task.MSG_SILENT,
                                                  'Completed run pipeline'
                                                  ).progress())
        batchState['pipeline_state'] = RUNNING_PIPELINE_STATE
        state.updateBatchState()
        
    if batchState['pipeline_state'] == RUNNING_PIPELINE_STATE:
        _log(batchState, 'Pipeline is in RUNNING_PIPELINE state')
        _monitorPipeline(batchState)
        yield _waitForPipeline(batchState)

        yield _updateTask(batchState,
                          lambda t : t.addMessage(tasks.task.MSG_SILENT,
                                                  'Completed running pipeline'
                                                  ).progress())
        batchState['pipeline_state'] = HARVEST_STATE
        state.updateBatchState()

    if batchState['pipeline_state'] == HARVEST_STATE:
        _log(batchState, 'Pipeline is in HARVEST state')
        # Using prerunqueue because we want everything here serialized
        yield state.prerunQueue.addWithDeferred(_harvestTransfer,
                                                batchState)

        yield _updateTask(batchState,
                          lambda t : t.addMessage(tasks.task.MSG_SILENT,
                                                  'Completed harvest'
                                                  ).progress())
        
        batchState['pipeline_state'] = SHUTDOWN_STATE
        state.updateBatchState()

    if batchState['pipeline_state'] == SHUTDOWN_STATE:
        _log(batchState, 'Pipeline is in SHUTDOWN state')

        if 'add_instances_task' in batchState:
            try:
                yield _blockOnTask(batchState['add_instances_task'],
                                   cluster=batchState['pipeline_config']['cluster.CLUSTER_NAME'])
            except Exception, err:
                logging.errorPrint(str(err))
                log.err(err)

        yield clusters_client.terminateCluster('localhost',
                                               batchState['pipeline_config']['cluster.CLUSTER_NAME'],
                                               'guest')
        

        yield _updateTask(batchState,
                          lambda t : t.addMessage(tasks.task.MSG_SILENT,
                                                  'Completed shutdown'
                                                  ).progress())
        
        batchState['pipeline_state'] = COMPLETED_STATE
        batchState['state'] = COMPLETED_STATE
        state.updateBatchState()
Beispiel #8
0
def _realizeUrls(request):
    localTag = yield request.state.tagPersist.loadTag(request.body['tag_name'])

    # If we have urls we create a fake phantom tag
    fakePhantom = {
        'cluster.ALL.command':
        'reliableDownloader.py -m 300 -t 20 -b ${base_dir} ' +
        ' '.join(localTag.metadata['urls'])
    }
    taskName = yield www_tags.realizePhantom(
        'localhost', request.body['dst_cluster'], request.body['user_name'],
        localTag.tagName, fakePhantom,
        func.updateDict(localTag.metadata, {'urls_realized': True}))
    localTask = yield tasks_tx.loadTask(request.body['task_name'])
    endState, tsk = yield tasks_tx.blockOnTaskAndForward(
        'localhost', request.body['dst_cluster'], taskName, localTask)

    if endState == tasks_tx.task.TASK_FAILED:
        yield tasks_tx.updateTask(
            request.body['task_name'],
            lambda t: t.setState(tasks_tx.task.TASK_FAILED))
        raise RealizePhantomError(request.body['tag_name'])

    if request.body['dst_cluster'] == 'local':
        yield tag_mq_data.tagData(request.state,
                                  request.body['tag_name'],
                                  request.body['task_name'],
                                  files=localTag.files,
                                  action=tag_mq_data.ACTION_APPEND,
                                  metadata={},
                                  recursive=False,
                                  expand=False,
                                  compressDir=None)
    else:
        localTask = yield www_tags.tagData('localhost',
                                           request.body['dst_cluster'],
                                           request.body['user_name'],
                                           action=tag_mq_data.ACTION_APPEND,
                                           tagName=localTag.tagName,
                                           files=localTag.files,
                                           metadata={},
                                           recursive=False,
                                           expand=False,
                                           compressDir=None)

    localTask = yield tasks_tx.loadTask(request.body['task_name'])
    endState, tsk = yield tasks_tx.blockOnTaskAndForward(
        'localhost', request.body['dst_cluster'], taskName, localTask)

    if endState == tasks_tx.task.TASK_FAILED:
        yield tasks_tx.updateTask(
            request.body['task_name'],
            lambda t: t.setState(tasks_tx.task.TASK_FAILED))
        raise RealizePhantomError(request.body['tag_name'])

    # Load the tag up and return it so we can have the files it created
    tag = yield www_tags.loadTag('localhost', request.body['dst_cluster'],
                                 request.body['user_name'],
                                 request.body['tag_name'])

    defer.returnValue(tag)
Beispiel #9
0
def _handleTransferTag(request):
    yield tasks_tx.updateTask(
        request.body['task_name'],
        lambda t: t.setState(tasks_tx.task.TASK_RUNNING).update(numTasks=2))

    srcTag = yield www_tags.loadTag('localhost', request.body['src_cluster'],
                                    request.body['user_name'],
                                    request.body['tag_name'])

    if not srcTag['phantom'] and (request.body['src_cluster'] != 'local'
                                  or request.body['dst_cluster'] != 'local'):
        if request.body['src_cluster'] == 'local':
            tag = yield _uploadTag(request)
        elif request.body['dst_cluster'] == 'local':
            tag = yield _downloadTag(request)
        else:
            raise NoLocalClusterError(
                'Source cluster or destination cluster must be local')

        yield tasks_tx.updateTask(request.body['task_name'],
                                  lambda t: t.progress())

        if request.body.get('compress', False) or request.body.get(
                'compress_dir', False):
            defaultDir = '/mnt/output' if request.body[
                'dst_cluster'] == 'local' else tag.metadata['tag_base_dir']
            compressDir = request.body.get('compress_dir') if request.body.get(
                'compress_dir', False) else defaultDir
        else:
            compressDir = None

        if request.body['dst_cluster'] == 'local':
            yield tag_mq_data.tagData(request.state,
                                      request.body['tag_name'],
                                      request.body['task_name'],
                                      files=tag.files,
                                      action=tag_mq_data.ACTION_OVERWRITE,
                                      metadata=tag.metadata,
                                      recursive=False,
                                      expand=False,
                                      compressDir=compressDir)
        else:
            newTag = yield www_tags.tagData(
                'localhost',
                request.body['dst_cluster'],
                request.body['user_name'],
                action=tag_mq_data.ACTION_OVERWRITE,
                tagName=tag.tagName,
                files=tag.files,
                metadata=tag.metadata,
                recursive=False,
                expand=False,
                compressDir=compressDir)

            localTask = yield tasks_tx.loadTask(request.body['task_name'])
            endState, tsk = yield tasks_tx.blockOnTaskAndForward(
                'localhost', request.body['dst_cluster'], newTag['task_name'],
                localTask)
            if endState == tasks_tx.task.TASK_FAILED:
                yield tasks_tx.updateTask(
                    request.body['task_name'],
                    lambda t: t.setState(tasks_tx.task.TASK_FAILED))
                raise TransferTagError(request.body['tag_name'])

        yield tasks_tx.updateTask(request.body['task_name'],
                                  lambda t: t.progress())
    elif not srcTag['phantom'] and srcTag['metadata'].get(
            'urls', []) and not srcTag['metadata'].get('urls_realized', False):
        # It's a local to local but we have urls and haven't realized them
        yield _realizeUrls(request)
        yield tasks_tx.updateTask(request.body['task_name'],
                                  lambda t: t.progress(2))
    elif srcTag['phantom']:
        # Upload the depends file
        srcClusters = yield www_clusters.listClusters(
            'localhost', {'cluster_name': request.body['src_cluster']},
            request.body['user_name'])

        srcCluster = srcClusters[0]

        dstClusters = yield www_clusters.listClusters(
            'localhost', {'cluster_name': request.body['dst_cluster']},
            request.body['user_name'])

        dstCluster = dstClusters[0]

        dependsOn = srcTag['phantom'].get('depends_on', '').split()
        yield rsync.rsyncTo(dstCluster['master']['public_dns'],
                            '/',
                            '/',
                            dependsOn,
                            srcCluster['config']['rsync.options'],
                            srcCluster['config']['rsync.user'],
                            log=True)

        taskName = yield www_tags.realizePhantom('localhost',
                                                 request.body['dst_cluster'],
                                                 request.body['user_name'],
                                                 srcTag['tag_name'],
                                                 srcTag['phantom'],
                                                 srcTag['metadata'])
        localTask = yield tasks_tx.loadTask(request.body['task_name'])
        endState, tsk = yield tasks_tx.blockOnTaskAndForward(
            'localhost', request.body['dst_cluster'], taskName, localTask)
        if endState == tasks_tx.task.TASK_FAILED:
            yield tasks_tx.updateTask(
                request.body['task_name'],
                lambda t: t.setState(tasks_tx.task.TASK_FAILED))
            raise RealizePhantomError(request.body['tag_name'])
        yield tasks_tx.updateTask(request.body['task_name'],
                                  lambda t: t.update(numTasks=1).progress())
    else:
        yield tag_mq_data.tagData(
            request.state,
            request.body['tag_name'],
            request.body['task_name'],
            files=[],
            action=tag_mq_data.ACTION_APPEND,
            metadata={},
            recursive=False,
            expand=False,
            compressDir='/mnt/output'
            if request.body.get('compress', False) else None)

        yield tasks_tx.updateTask(request.body['task_name'],
                                  lambda t: t.progress(2))

    defer.returnValue(request)
Beispiel #10
0
def _run(state, batchState):
    if 'pipeline_name' in batchState:
        pipelines = yield pipelines_client.pipelineList(
            'localhost',
            'local',
            'guest',
            batchState['pipeline_name'],
            detail=True)
    else:
        pipelines = []

    if not pipelines:
        _log(batchState,
             'First time running, creating pipeline state information')
        batchState['pipeline_config'] = yield _applyActions(
            state.innerPipelineConfig(), batchState['actions'])
        batchState['pipeline_state'] = STARTCLUSTER_STATE

        # We need to create a fake, local, pipeline for metrics to work
        batchState['pipeline_name'] = pipeline_misc.checksumInput(
            batchState['pipeline_config'])
        batchState['pipeline_config']['pipeline.PIPELINE_NAME'] = batchState[
            'pipeline_name']
        batchState['pipeline_config'][
            'pipeline.PIPELINE_WRAPPER_NAME'] = batchState['pipeline_name']

        _log(batchState, 'Pipeline named ' + batchState['pipeline_name'])

        pipeline = yield pipelines_client.createPipeline(
            host='localhost',
            clusterName='local',
            userName='******',
            pipelineName=batchState['pipeline_name'],
            protocol='clovr_wrapper',
            queue='pipeline.q',
            config=batchState['pipeline_config'],
            parentPipeline=state.parentPipeline())

        batchState['lgt_wrapper_task_name'] = pipeline['task_name']

        _log(
            batchState,
            'Setting number of tasks to 9 (number in a standard lgt_wrapper)')
        yield _updateTask(batchState,
                          lambda t: t.update(completedTasks=0, numTasks=9))

        state.updateBatchState()

    batchState['state'] = RUNNING_STATE

    _log(batchState,
         'Pipeline started in %s state' % batchState['pipeline_state'])

    yield _updateTask(batchState,
                      lambda t: t.setState(tasks.task.TASK_RUNNING))

    pipelineConfigFile = os.path.join(TMP_DIR, 'pipeline_configs',
                                      global_state.make_ref() + '.conf')

    _log(batchState, 'Creating ergatis configuration')
    _writeErgatisConfig(batchState['pipeline_config'], pipelineConfigFile)

    if batchState['pipeline_state'] == STARTCLUSTER_STATE:
        _log(batchState, 'Pipeline is in STARTCLUSTER state')

        # First see if the cluster exists but is unresponsive
        try:
            cluster = yield loadCluster(
                'localhost',
                batchState['pipeline_config']['cluster.CLUSTER_NAME'], 'guest')
            if cluster['state'] == 'unresponsive':
                _log(batchState, 'Pipeline is unresponsive, terminating')
                terminateTask = yield clusters_client.terminateCluster(
                    'localhost',
                    batchState['pipeline_config']['cluster.CLUSTER_NAME'],
                    'guest')
                yield _blockOnTask(terminateTask)
        except:
            pass

        batchState['cluster_task'] = yield startCluster(
            batchState, 'localhost',
            batchState['pipeline_config']['cluster.CLUSTER_NAME'], 'guest',
            int(batchState['pipeline_config']['cluster.EXEC_NODES']), 0,
            batchState['pipeline_config']['cluster.CLUSTER_CREDENTIAL'], {
                'cluster.MASTER_INSTANCE_TYPE':
                batchState['pipeline_config']['cluster.MASTER_INSTANCE_TYPE'],
                'cluster.MASTER_BID_PRICE':
                batchState['pipeline_config']['cluster.MASTER_BID_PRICE'],
                'cluster.EXEC_INSTANCE_TYPE':
                batchState['pipeline_config']['cluster.EXEC_INSTANCE_TYPE'],
                'cluster.EXEC_BID_PRICE':
                batchState['pipeline_config']['cluster.EXEC_BID_PRICE']
            })

        yield _updateTask(batchState,
                          lambda t: t.update(completedTasks=0, numTasks=9))

        yield _updateTask(
            batchState, lambda t: t.addMessage(
                tasks.task.MSG_SILENT, 'Completed startcluster').progress())

        batchState['pipeline_state'] = REMOTE_LOCAL_TRANSFER_STATE
        state.updateBatchState()

    if batchState['pipeline_state'] == REMOTE_LOCAL_TRANSFER_STATE:
        _log(batchState, 'Pipeline is in REMOTE_LOCAL_TRANSFER')

        _log(batchState, 'Making sure cluster is exists in some form')
        cluster = yield clusters_client.loadCluster(
            'localhost', batchState['pipeline_config']['cluster.CLUSTER_NAME'],
            'guest')

        if cluster['state'] == 'unresponsive':
            _log(batchState,
                 'Pipeline is unresponsive, erroring and restarting')
            raise Exception('Cluster is not responsive')

        yield state.prerunQueue.addWithDeferred(_remoteLocalTransfer,
                                                batchState)

        yield _updateTask(
            batchState, lambda t: t.addMessage(
                tasks.task.MSG_SILENT, 'Completed remote_local_transfer').
            progress())

        batchState['pipeline_state'] = DECRYPT_STATE
        state.updateBatchState()

    if batchState['pipeline_state'] == DECRYPT_STATE:
        _log(batchState, 'Pipeline is in DECRYPT')

        cluster = yield loadCluster(
            'localhost', batchState['pipeline_config']['cluster.CLUSTER_NAME'],
            'guest')

        tag = yield tags_client.loadTag(
            'localhost', batchState['pipeline_config']['cluster.CLUSTER_NAME'],
            'guest', _decryptTagName(batchState))

        conf = config.configFromStream(open('/tmp/machine.conf'))

        yield ssh.runProcessSSH(cluster['master']['public_dns'],
                                'mkdir -p /mnt/lgt_decrypt',
                                stdoutf=None,
                                stderrf=None,
                                sshUser=conf('ssh.user'),
                                sshFlags=conf('ssh.options'),
                                log=True)

        yield rsync.rsyncTo(
            cluster['master']['public_dns'],
            batchState['pipeline_config']['params.DECRYPT_SCRIPT'],
            '/mnt/',
            options=conf('rsync.options'),
            user=conf('rsync.user'),
            log=True)

        for f in tag['files']:
            decryptCmd = ' '.join([
                os.path.join(
                    '/mnt',
                    os.path.basename(batchState['pipeline_config']
                                     ['params.DECRYPT_SCRIPT'])), f,
                '-out-dir', '/mnt/lgt_decrypt', '-remove-encrypted',
                '-password',
                batchState['pipeline_config']['params.DECRYPT_PASSWORD']
            ])

            yield ssh.getOutput(cluster['master']['public_dns'],
                                decryptCmd,
                                sshUser=conf('ssh.user'),
                                sshFlags=conf('ssh.options'),
                                expected=[0, 253],
                                log=True)

        tag = yield tags_client.tagData(
            host='localhost',
            clusterName=batchState['pipeline_config']['cluster.CLUSTER_NAME'],
            userName='******',
            action='overwrite',
            tagName=_decryptTagName(batchState),
            files=['/mnt/lgt_decrypt'],
            metadata={},
            recursive=True,
            expand=False,
            compressDir=None)

        _log(
            batchState, 'Waiting for tagging of %s to complete - %s' %
            (_decryptTagName(batchState), tag['task_name']))

        yield _blockOnTask(
            tag['task_name'],
            cluster=batchState['pipeline_config']['cluster.CLUSTER_NAME'])

        yield _updateTask(
            batchState, lambda t: t.addMessage(tasks.task.MSG_SILENT,
                                               'Completed decrypt').progress())

        batchState['pipeline_state'] = REFERENCE_TRANSFER_STATE
        state.updateBatchState()

    if batchState['pipeline_state'] == REFERENCE_TRANSFER_STATE:
        _log(batchState, 'Pipeline is in REFERENCE_TRANSFER state')

        transfers = []
        tags = (batchState['pipeline_config']['input.REF_TAG1'].split(',') +
                batchState['pipeline_config']['input.REF_TAG2'].split(','))
        for tag in tags:
            tag = tag.strip()
            output = yield _getOutput(batchState, [
                'vp-transfer-dataset', '-t', '--tag-name=' + tag,
                '--dst-cluster=' +
                batchState['pipeline_config']['cluster.CLUSTER_NAME']
            ],
                                      log=True)

            transfers.append(output['stdout'].strip())

        for task in transfers:
            yield _blockOnTask(task)

        yield _updateTask(
            batchState,
            lambda t: t.addMessage(tasks.task.MSG_SILENT,
                                   'Completed reference_transfer').progress())

        batchState['pipeline_state'] = RUN_PIPELINE_STATE
        state.updateBatchState()

    if batchState['pipeline_state'] == RUN_PIPELINE_STATE:
        _log(batchState, 'Pipeline is in RUN_PIPELINE state')
        batchState['pipeline_config']['input.INPUT_TAG'] = _decryptTagName(
            batchState)
        pipeline = yield pipelines_client.runPipeline(
            host='localhost',
            clusterName=batchState['pipeline_config']['cluster.CLUSTER_NAME'],
            userName='******',
            parentPipeline=batchState['pipeline_name'],
            bareRun=True,
            queue=state.innerPipelineQueue(),
            config=batchState['pipeline_config'],
            overwrite=True)
        batchState['pipeline_task'] = pipeline['task_name']

        yield _updateTask(
            batchState, lambda t: t.addMessage(
                tasks.task.MSG_SILENT, 'Completed run pipeline').progress())
        batchState['pipeline_state'] = RUNNING_PIPELINE_STATE
        state.updateBatchState()

    if batchState['pipeline_state'] == RUNNING_PIPELINE_STATE:
        _log(batchState, 'Pipeline is in RUNNING_PIPELINE state')
        _monitorPipeline(batchState)
        yield _waitForPipeline(batchState)

        yield _updateTask(
            batchState,
            lambda t: t.addMessage(tasks.task.MSG_SILENT,
                                   'Completed running pipeline').progress())
        batchState['pipeline_state'] = HARVEST_STATE
        state.updateBatchState()

    if batchState['pipeline_state'] == HARVEST_STATE:
        _log(batchState, 'Pipeline is in HARVEST state')
        # Using prerunqueue because we want everything here serialized
        yield state.prerunQueue.addWithDeferred(_harvestTransfer, batchState)

        yield _updateTask(
            batchState, lambda t: t.addMessage(tasks.task.MSG_SILENT,
                                               'Completed harvest').progress())

        batchState['pipeline_state'] = SHUTDOWN_STATE
        state.updateBatchState()

    if batchState['pipeline_state'] == SHUTDOWN_STATE:
        _log(batchState, 'Pipeline is in SHUTDOWN state')

        if 'add_instances_task' in batchState:
            try:
                yield _blockOnTask(batchState['add_instances_task'],
                                   cluster=batchState['pipeline_config']
                                   ['cluster.CLUSTER_NAME'])
            except Exception, err:
                logging.errorPrint(str(err))
                log.err(err)

        yield clusters_client.terminateCluster(
            'localhost', batchState['pipeline_config']['cluster.CLUSTER_NAME'],
            'guest')

        yield _updateTask(
            batchState, lambda t: t.addMessage(
                tasks.task.MSG_SILENT, 'Completed shutdown').progress())

        batchState['pipeline_state'] = COMPLETED_STATE
        batchState['state'] = COMPLETED_STATE
        state.updateBatchState()