Beispiel #1
0
def _handleTransferTag(request):
    yield tasks_tx.updateTask(request.body['task_name'],
                              lambda t : t.setState(tasks_tx.task.TASK_RUNNING).update(numTasks=2))

    srcTag = yield www_tags.loadTag('localhost',
                                    request.body['src_cluster'],
                                    request.body['user_name'],
                                    request.body['tag_name'])

    if not srcTag['phantom'] and (request.body['src_cluster'] != 'local' or request.body['dst_cluster'] != 'local'):
        if request.body['src_cluster'] == 'local':
            tag = yield _uploadTag(request)
        elif request.body['dst_cluster'] == 'local':
            tag = yield _downloadTag(request)
        else:
            raise NoLocalClusterError('Source cluster or destination cluster must be local')

        yield tasks_tx.updateTask(request.body['task_name'],
                                  lambda t : t.progress())

        if request.body.get('compress', False) or request.body.get('compress_dir', False):
            defaultDir = '/mnt/output' if request.body['dst_cluster'] == 'local' else tag.metadata['tag_base_dir']
            compressDir = request.body.get('compress_dir') if request.body.get('compress_dir', False) else defaultDir 
        else:
            compressDir = None
        
        if request.body['dst_cluster'] == 'local':
            yield tag_mq_data.tagData(request.state,
                                      request.body['tag_name'],
                                      request.body['task_name'],
                                      files=tag.files,
                                      action=tag_mq_data.ACTION_OVERWRITE,
                                      metadata=tag.metadata,
                                      recursive=False,
                                      expand=False,
                                      compressDir=compressDir)
        else:
            newTag = yield www_tags.tagData('localhost',
                                            request.body['dst_cluster'],
                                            request.body['user_name'],
                                            action=tag_mq_data.ACTION_OVERWRITE,
                                            tagName=tag.tagName,
                                            files=tag.files,
                                            metadata=tag.metadata,
                                            recursive=False,
                                            expand=False,
                                            compressDir=compressDir)

            localTask = yield tasks_tx.loadTask(request.body['task_name'])
            endState, tsk = yield tasks_tx.blockOnTaskAndForward('localhost',
                                                                 request.body['dst_cluster'],
                                                                 newTag['task_name'],
                                                                 localTask)
            if endState == tasks_tx.task.TASK_FAILED:
                yield tasks_tx.updateTask(request.body['task_name'],
                                          lambda t : t.setState(tasks_tx.task.TASK_FAILED))
                raise TransferTagError(request.body['tag_name'])
    
        yield tasks_tx.updateTask(request.body['task_name'],
                                  lambda t : t.progress())
    elif not srcTag['phantom'] and srcTag['metadata'].get('urls', []) and not srcTag['metadata'].get('urls_realized', False):
        # It's a local to local but we have urls and haven't realized them
        yield _realizeUrls(request)
        yield tasks_tx.updateTask(request.body['task_name'],
                                  lambda t : t.progress(2))
    elif srcTag['phantom']:
        # Upload the depends file
        srcClusters = yield www_clusters.listClusters('localhost',
                                                      {'cluster_name': request.body['src_cluster']},
                                                      request.body['user_name'])

        srcCluster = srcClusters[0]
        
        dstClusters = yield www_clusters.listClusters('localhost',
                                                      {'cluster_name': request.body['dst_cluster']},
                                                      request.body['user_name'])

        dstCluster = dstClusters[0]
        
        dependsOn = srcTag['phantom'].get('depends_on', '').split()
        yield rsync.rsyncTo(dstCluster['master']['public_dns'],
                            '/',
                            '/',
                            dependsOn,
                            srcCluster['config']['rsync.options'],
                            srcCluster['config']['rsync.user'],
                            log=True)
        
        
        taskName = yield www_tags.realizePhantom('localhost',
                                                 request.body['dst_cluster'],
                                                 request.body['user_name'],
                                                 srcTag['tag_name'],
                                                 srcTag['phantom'],
                                                 srcTag['metadata'])
        localTask = yield tasks_tx.loadTask(request.body['task_name'])
        endState, tsk = yield tasks_tx.blockOnTaskAndForward('localhost',
                                                             request.body['dst_cluster'],
                                                             taskName,
                                                             localTask)
        if endState == tasks_tx.task.TASK_FAILED:
            yield tasks_tx.updateTask(request.body['task_name'],
                                      lambda t : t.setState(tasks_tx.task.TASK_FAILED))
            raise RealizePhantomError(request.body['tag_name'])
        yield tasks_tx.updateTask(request.body['task_name'],
                                  lambda t : t.update(numTasks=1).progress())
    else:
        yield tag_mq_data.tagData(request.state,
                                  request.body['tag_name'],
                                  request.body['task_name'],
                                  files=[],
                                  action=tag_mq_data.ACTION_APPEND,
                                  metadata={},
                                  recursive=False,
                                  expand=False,
                                  compressDir='/mnt/output' if request.body.get('compress', False) else None)
        
        yield tasks_tx.updateTask(request.body['task_name'],
                                  lambda t : t.progress(2))
        
    defer.returnValue(request)
Beispiel #2
0
def _uploadTag(request):
    localTag = yield request.state.tagPersist.loadTag(request.body['tag_name'])

    srcClusters = yield www_clusters.listClusters('localhost',
                                                  {'cluster_name': request.body['src_cluster']},
                                                  request.body['user_name'])

    srcCluster = srcClusters[0]

    dstClusters = yield www_clusters.listClusters('localhost',
                                                  {'cluster_name': request.body['dst_cluster']},
                                                  request.body['user_name'])

    dstCluster = dstClusters[0]

    # We want the trailing '/' so everyone knows it's a directory
    dstTagPath = os.path.join(dstCluster['config']['dirs.upload_dir'], localTag.tagName) + '/'

    # Change dstTagPath to use /mnt/nostaging instead of mnt/staging as the destination if the 
    # tag has a nostaging metadata value
    #if localTag.metadata['nostaging']:
    #    dstTagPath = os.path.join(dstCluster['config']['dirs.nostaging_upload_dir'], localTag.tagName) + '/'
    if request.body['dst_type'] == 'local' :
        upload_dir = '/mnt/nostaging/'
        if 'dirs.nostaging_upload_dir' in dstCluster['config']:
            upload_dir = dstCluster['config']['dirs.nostaging_upload_dir']
        yield _makeDirsOnCluster(dstCluster, [upload_dir])
        dstTagPath = os.path.join(upload_dir, localTag.tagName) + '/'

    baseDirFiles, nonBaseDirFiles = _partitionFiles(localTag.files, localTag.metadata['tag_base_dir'])

    if baseDirFiles:
        yield rsync.rsyncTo(dstCluster['master']['public_dns'],
                            localTag.metadata['tag_base_dir'],
                            dstTagPath,
                            baseDirFiles,
                            srcCluster['config']['rsync.options'],
                            srcCluster['config']['rsync.user'],
                            log=True)

    if nonBaseDirFiles:
        yield rsync.rsyncTo(dstCluster['master']['public_dns'],
                            '/',
                            dstTagPath,
                            nonBaseDirFiles,
                            srcCluster['config']['rsync.options'],
                            srcCluster['config']['rsync.user'],
                            log=True)

    remoteFiles = ([os.path.join(dstTagPath, f) for f in baseDirFiles] +
                   [os.path.join(dstTagPath, _makePathRelative(f)) for f in nonBaseDirFiles])

    metadata = localTag.metadata
    if metadata.get('urls', []) and not metadata.get('urls_realized', False):
        tag = yield _realizeUrls(request)
        remoteFiles.extend(tag['files'])
        metadata = func.updateDict(metadata,
                                   {'urls_realized': True})


    # If we are dealing with an adhoc cluster here there is a chance that
    # we are attempting to transfer to a local VM's shared folder which does 
    # support chown/chmod so we will skip this step
    if dstCluster['master']['instance_type'] is not None:
        yield ssh.runProcessSSH(dstCluster['master']['public_dns'],
                                'chown -R %s %s' % (dstCluster['config']['vappio.user'],
                                                    dstTagPath),
                                None,
                                log.err,
                                srcCluster['config']['ssh.user'],
                                srcCluster['config']['ssh.options'])
        
    defer.returnValue(persist.Tag(tagName=localTag.tagName,
                                  files=remoteFiles,
                                  metadata=func.updateDict(metadata,
                                                           {'tag_base_dir': dstTagPath}),
                                  phantom=localTag.phantom,
                                  taskName=None))
Beispiel #3
0
def _run(state, batchState):
    if 'pipeline_name' in batchState:
        pipelines = yield pipelines_client.pipelineList('localhost',
                                                        'local',
                                                        'guest',
                                                        batchState['pipeline_name'],
                                                        detail=True)
    else:
        pipelines = []
        
    if not pipelines:
        _log(batchState, 'First time running, creating pipeline state information')
        batchState['pipeline_config'] = yield _applyActions(state.innerPipelineConfig(),
                                                            batchState['actions'])
        batchState['pipeline_state'] = STARTCLUSTER_STATE

        # We need to create a fake, local, pipeline for metrics to work
        batchState['pipeline_name'] = pipeline_misc.checksumInput(batchState['pipeline_config'])
        batchState['pipeline_config']['pipeline.PIPELINE_NAME'] = batchState['pipeline_name']
        batchState['pipeline_config']['pipeline.PIPELINE_WRAPPER_NAME'] = batchState['pipeline_name']

        _log(batchState, 'Pipeline named ' + batchState['pipeline_name'])
        
        pipeline = yield pipelines_client.createPipeline(host='localhost',
                                                         clusterName='local',
                                                         userName='******',
                                                         pipelineName=batchState['pipeline_name'],
                                                         protocol='clovr_wrapper',
                                                         queue='pipeline.q',
                                                         config=batchState['pipeline_config'],
                                                         parentPipeline=state.parentPipeline())

        batchState['lgt_wrapper_task_name'] = pipeline['task_name']

        _log(batchState, 'Setting number of tasks to 9 (number in a standard lgt_wrapper)')
        yield _updateTask(batchState,
                          lambda t : t.update(completedTasks=0,
                                              numTasks=9))
        
        state.updateBatchState()

    batchState['state'] = RUNNING_STATE

    _log(batchState, 'Pipeline started in %s state' % batchState['pipeline_state'])

    yield _updateTask(batchState,
                      lambda t : t.setState(tasks.task.TASK_RUNNING))
    
    pipelineConfigFile = os.path.join(TMP_DIR, 'pipeline_configs', global_state.make_ref() + '.conf')
    
    _log(batchState, 'Creating ergatis configuration')
    _writeErgatisConfig(batchState['pipeline_config'], pipelineConfigFile)

    if batchState['pipeline_state'] == STARTCLUSTER_STATE:
        _log(batchState, 'Pipeline is in STARTCLUSTER state')

        # First see if the cluster exists but is unresponsive
        try:
            cluster = yield loadCluster('localhost',
                                        batchState['pipeline_config']['cluster.CLUSTER_NAME'],
                                        'guest')
            if cluster['state'] == 'unresponsive':
                _log(batchState, 'Pipeline is unresponsive, terminating')
                terminateTask = yield clusters_client.terminateCluster('localhost',
                                                                       batchState['pipeline_config']['cluster.CLUSTER_NAME'],
                                                                       'guest')
                yield _blockOnTask(terminateTask)
        except:
            pass

        batchState['cluster_task'] = yield startCluster(
            batchState,
            'localhost',
            batchState['pipeline_config']['cluster.CLUSTER_NAME'],
            'guest',
            int(batchState['pipeline_config']['cluster.EXEC_NODES']),
            0,
            batchState['pipeline_config']['cluster.CLUSTER_CREDENTIAL'],
            {'cluster.MASTER_INSTANCE_TYPE':
                 batchState['pipeline_config']['cluster.MASTER_INSTANCE_TYPE'],
             'cluster.MASTER_BID_PRICE':
                 batchState['pipeline_config']['cluster.MASTER_BID_PRICE'],
             'cluster.EXEC_INSTANCE_TYPE':
                 batchState['pipeline_config']['cluster.EXEC_INSTANCE_TYPE'],
             'cluster.EXEC_BID_PRICE':
                 batchState['pipeline_config']['cluster.EXEC_BID_PRICE']})

        yield _updateTask(batchState,
                          lambda t : t.update(completedTasks=0,
                                              numTasks=9))
        
        yield _updateTask(batchState,
                          lambda t : t.addMessage(tasks.task.MSG_SILENT,
                                                  'Completed startcluster'
                                                  ).progress())
        
        batchState['pipeline_state'] = REMOTE_LOCAL_TRANSFER_STATE
        state.updateBatchState()

    if batchState['pipeline_state'] == REMOTE_LOCAL_TRANSFER_STATE:
        _log(batchState, 'Pipeline is in REMOTE_LOCAL_TRANSFER')

        _log(batchState, 'Making sure cluster is exists in some form')
        cluster = yield clusters_client.loadCluster('localhost',
                                                    batchState['pipeline_config']['cluster.CLUSTER_NAME'],
                                                    'guest')

        if cluster['state'] == 'unresponsive':
            _log(batchState, 'Pipeline is unresponsive, erroring and restarting')
            raise Exception('Cluster is not responsive')
        
        yield state.prerunQueue.addWithDeferred(_remoteLocalTransfer,
                                                batchState)

        yield _updateTask(batchState,
                          lambda t : t.addMessage(tasks.task.MSG_SILENT,
                                                  'Completed remote_local_transfer'
                                                  ).progress())

        batchState['pipeline_state'] = DECRYPT_STATE
        state.updateBatchState()

    if batchState['pipeline_state'] == DECRYPT_STATE:
        _log(batchState, 'Pipeline is in DECRYPT')

        cluster = yield loadCluster('localhost',
                                    batchState['pipeline_config']['cluster.CLUSTER_NAME'],
                                    'guest')

        tag = yield tags_client.loadTag('localhost',
                                        batchState['pipeline_config']['cluster.CLUSTER_NAME'],
                                        'guest',
                                        _decryptTagName(batchState))

        conf = config.configFromStream(open('/tmp/machine.conf'))

        yield ssh.runProcessSSH(cluster['master']['public_dns'],
                                'mkdir -p /mnt/lgt_decrypt',
                                stdoutf=None,
                                stderrf=None,
                                sshUser=conf('ssh.user'),
                                sshFlags=conf('ssh.options'),
                                log=True)

        yield rsync.rsyncTo(cluster['master']['public_dns'],
                            batchState['pipeline_config']['params.DECRYPT_SCRIPT'],
                            '/mnt/',
                            options=conf('rsync.options'),
                            user=conf('rsync.user'),
                            log=True)
        
        for f in tag['files']:
            decryptCmd = ' '.join([os.path.join('/mnt', os.path.basename(batchState['pipeline_config']['params.DECRYPT_SCRIPT'])),
                                   f,
                                   '-out-dir', '/mnt/lgt_decrypt',
                                   '-remove-encrypted',
                                   '-password', batchState['pipeline_config']['params.DECRYPT_PASSWORD']])
                                       
            
            yield ssh.getOutput(cluster['master']['public_dns'],
                                decryptCmd,
                                sshUser=conf('ssh.user'),
                                sshFlags=conf('ssh.options'),
                                expected=[0, 253],
                                log=True)

        tag = yield tags_client.tagData(host='localhost',
                                        clusterName=batchState['pipeline_config']['cluster.CLUSTER_NAME'],
                                        userName='******',
                                        action='overwrite',
                                        tagName=_decryptTagName(batchState),
                                        files=['/mnt/lgt_decrypt'],
                                        metadata={},
                                        recursive=True,
                                        expand=False,
                                        compressDir=None)

        _log(batchState, 'Waiting for tagging of %s to complete - %s' % (_decryptTagName(batchState),
                                                                         tag['task_name']))

        yield _blockOnTask(tag['task_name'],
                           cluster=batchState['pipeline_config']['cluster.CLUSTER_NAME'])
        
        yield _updateTask(batchState,
                          lambda t : t.addMessage(tasks.task.MSG_SILENT,
                                                  'Completed decrypt'
                                                  ).progress())

        batchState['pipeline_state'] = REFERENCE_TRANSFER_STATE
        state.updateBatchState()
        

    if batchState['pipeline_state'] == REFERENCE_TRANSFER_STATE:
        _log(batchState, 'Pipeline is in REFERENCE_TRANSFER state')
        
        transfers = []
        tags = (batchState['pipeline_config']['input.REF_TAG1'].split(',') +
                batchState['pipeline_config']['input.REF_TAG2'].split(','))
        for tag in tags:
            tag = tag.strip()
            output = yield _getOutput(batchState,
                                      ['vp-transfer-dataset',
                                       '-t',
                                       '--tag-name=' + tag,
                                       '--dst-cluster=' + batchState['pipeline_config']['cluster.CLUSTER_NAME']],
                                      log=True)
            
            transfers.append(output['stdout'].strip())

        for task in transfers:
            yield _blockOnTask(task)

        yield _updateTask(batchState,
                          lambda t : t.addMessage(tasks.task.MSG_SILENT,
                                                  'Completed reference_transfer'
                                                  ).progress())

        batchState['pipeline_state'] = RUN_PIPELINE_STATE
        state.updateBatchState()


    if batchState['pipeline_state'] == RUN_PIPELINE_STATE:
        _log(batchState, 'Pipeline is in RUN_PIPELINE state')
        batchState['pipeline_config']['input.INPUT_TAG'] = _decryptTagName(batchState)
        pipeline = yield pipelines_client.runPipeline(host='localhost',
                                                      clusterName=batchState['pipeline_config']['cluster.CLUSTER_NAME'],
                                                      userName='******',
                                                      parentPipeline=batchState['pipeline_name'],
                                                      bareRun=True,
                                                      queue=state.innerPipelineQueue(),
                                                      config=batchState['pipeline_config'],
                                                      overwrite=True)
        batchState['pipeline_task'] = pipeline['task_name']

        yield _updateTask(batchState,
                          lambda t : t.addMessage(tasks.task.MSG_SILENT,
                                                  'Completed run pipeline'
                                                  ).progress())
        batchState['pipeline_state'] = RUNNING_PIPELINE_STATE
        state.updateBatchState()
        
    if batchState['pipeline_state'] == RUNNING_PIPELINE_STATE:
        _log(batchState, 'Pipeline is in RUNNING_PIPELINE state')
        _monitorPipeline(batchState)
        yield _waitForPipeline(batchState)

        yield _updateTask(batchState,
                          lambda t : t.addMessage(tasks.task.MSG_SILENT,
                                                  'Completed running pipeline'
                                                  ).progress())
        batchState['pipeline_state'] = HARVEST_STATE
        state.updateBatchState()

    if batchState['pipeline_state'] == HARVEST_STATE:
        _log(batchState, 'Pipeline is in HARVEST state')
        # Using prerunqueue because we want everything here serialized
        yield state.prerunQueue.addWithDeferred(_harvestTransfer,
                                                batchState)

        yield _updateTask(batchState,
                          lambda t : t.addMessage(tasks.task.MSG_SILENT,
                                                  'Completed harvest'
                                                  ).progress())
        
        batchState['pipeline_state'] = SHUTDOWN_STATE
        state.updateBatchState()

    if batchState['pipeline_state'] == SHUTDOWN_STATE:
        _log(batchState, 'Pipeline is in SHUTDOWN state')

        if 'add_instances_task' in batchState:
            try:
                yield _blockOnTask(batchState['add_instances_task'],
                                   cluster=batchState['pipeline_config']['cluster.CLUSTER_NAME'])
            except Exception, err:
                logging.errorPrint(str(err))
                log.err(err)

        yield clusters_client.terminateCluster('localhost',
                                               batchState['pipeline_config']['cluster.CLUSTER_NAME'],
                                               'guest')
        

        yield _updateTask(batchState,
                          lambda t : t.addMessage(tasks.task.MSG_SILENT,
                                                  'Completed shutdown'
                                                  ).progress())
        
        batchState['pipeline_state'] = COMPLETED_STATE
        batchState['state'] = COMPLETED_STATE
        state.updateBatchState()
Beispiel #4
0
def _handleTransferTag(request):
    yield tasks_tx.updateTask(
        request.body['task_name'],
        lambda t: t.setState(tasks_tx.task.TASK_RUNNING).update(numTasks=2))

    srcTag = yield www_tags.loadTag('localhost', request.body['src_cluster'],
                                    request.body['user_name'],
                                    request.body['tag_name'])

    if not srcTag['phantom'] and (request.body['src_cluster'] != 'local'
                                  or request.body['dst_cluster'] != 'local'):
        if request.body['src_cluster'] == 'local':
            tag = yield _uploadTag(request)
        elif request.body['dst_cluster'] == 'local':
            tag = yield _downloadTag(request)
        else:
            raise NoLocalClusterError(
                'Source cluster or destination cluster must be local')

        yield tasks_tx.updateTask(request.body['task_name'],
                                  lambda t: t.progress())

        if request.body.get('compress', False) or request.body.get(
                'compress_dir', False):
            defaultDir = '/mnt/output' if request.body[
                'dst_cluster'] == 'local' else tag.metadata['tag_base_dir']
            compressDir = request.body.get('compress_dir') if request.body.get(
                'compress_dir', False) else defaultDir
        else:
            compressDir = None

        if request.body['dst_cluster'] == 'local':
            yield tag_mq_data.tagData(request.state,
                                      request.body['tag_name'],
                                      request.body['task_name'],
                                      files=tag.files,
                                      action=tag_mq_data.ACTION_OVERWRITE,
                                      metadata=tag.metadata,
                                      recursive=False,
                                      expand=False,
                                      compressDir=compressDir)
        else:
            newTag = yield www_tags.tagData(
                'localhost',
                request.body['dst_cluster'],
                request.body['user_name'],
                action=tag_mq_data.ACTION_OVERWRITE,
                tagName=tag.tagName,
                files=tag.files,
                metadata=tag.metadata,
                recursive=False,
                expand=False,
                compressDir=compressDir)

            localTask = yield tasks_tx.loadTask(request.body['task_name'])
            endState, tsk = yield tasks_tx.blockOnTaskAndForward(
                'localhost', request.body['dst_cluster'], newTag['task_name'],
                localTask)
            if endState == tasks_tx.task.TASK_FAILED:
                yield tasks_tx.updateTask(
                    request.body['task_name'],
                    lambda t: t.setState(tasks_tx.task.TASK_FAILED))
                raise TransferTagError(request.body['tag_name'])

        yield tasks_tx.updateTask(request.body['task_name'],
                                  lambda t: t.progress())
    elif not srcTag['phantom'] and srcTag['metadata'].get(
            'urls', []) and not srcTag['metadata'].get('urls_realized', False):
        # It's a local to local but we have urls and haven't realized them
        yield _realizeUrls(request)
        yield tasks_tx.updateTask(request.body['task_name'],
                                  lambda t: t.progress(2))
    elif srcTag['phantom']:
        # Upload the depends file
        srcClusters = yield www_clusters.listClusters(
            'localhost', {'cluster_name': request.body['src_cluster']},
            request.body['user_name'])

        srcCluster = srcClusters[0]

        dstClusters = yield www_clusters.listClusters(
            'localhost', {'cluster_name': request.body['dst_cluster']},
            request.body['user_name'])

        dstCluster = dstClusters[0]

        dependsOn = srcTag['phantom'].get('depends_on', '').split()
        yield rsync.rsyncTo(dstCluster['master']['public_dns'],
                            '/',
                            '/',
                            dependsOn,
                            srcCluster['config']['rsync.options'],
                            srcCluster['config']['rsync.user'],
                            log=True)

        taskName = yield www_tags.realizePhantom('localhost',
                                                 request.body['dst_cluster'],
                                                 request.body['user_name'],
                                                 srcTag['tag_name'],
                                                 srcTag['phantom'],
                                                 srcTag['metadata'])
        localTask = yield tasks_tx.loadTask(request.body['task_name'])
        endState, tsk = yield tasks_tx.blockOnTaskAndForward(
            'localhost', request.body['dst_cluster'], taskName, localTask)
        if endState == tasks_tx.task.TASK_FAILED:
            yield tasks_tx.updateTask(
                request.body['task_name'],
                lambda t: t.setState(tasks_tx.task.TASK_FAILED))
            raise RealizePhantomError(request.body['tag_name'])
        yield tasks_tx.updateTask(request.body['task_name'],
                                  lambda t: t.update(numTasks=1).progress())
    else:
        yield tag_mq_data.tagData(
            request.state,
            request.body['tag_name'],
            request.body['task_name'],
            files=[],
            action=tag_mq_data.ACTION_APPEND,
            metadata={},
            recursive=False,
            expand=False,
            compressDir='/mnt/output'
            if request.body.get('compress', False) else None)

        yield tasks_tx.updateTask(request.body['task_name'],
                                  lambda t: t.progress(2))

    defer.returnValue(request)
Beispiel #5
0
def _uploadTag(request):
    localTag = yield request.state.tagPersist.loadTag(request.body['tag_name'])

    srcClusters = yield www_clusters.listClusters(
        'localhost', {'cluster_name': request.body['src_cluster']},
        request.body['user_name'])

    srcCluster = srcClusters[0]

    dstClusters = yield www_clusters.listClusters(
        'localhost', {'cluster_name': request.body['dst_cluster']},
        request.body['user_name'])

    dstCluster = dstClusters[0]

    # We want the trailing '/' so everyone knows it's a directory
    dstTagPath = os.path.join(dstCluster['config']['dirs.upload_dir'],
                              localTag.tagName) + '/'

    # Change dstTagPath to use /mnt/nostaging instead of mnt/staging as the destination if the
    # tag has a nostaging metadata value
    #if localTag.metadata['nostaging']:
    #    dstTagPath = os.path.join(dstCluster['config']['dirs.nostaging_upload_dir'], localTag.tagName) + '/'
    if request.body['dst_type'] == 'local':
        upload_dir = '/mnt/nostaging/'
        if 'dirs.nostaging_upload_dir' in dstCluster['config']:
            upload_dir = dstCluster['config']['dirs.nostaging_upload_dir']
        yield _makeDirsOnCluster(dstCluster, [upload_dir])
        dstTagPath = os.path.join(upload_dir, localTag.tagName) + '/'

    baseDirFiles, nonBaseDirFiles = _partitionFiles(
        localTag.files, localTag.metadata['tag_base_dir'])

    if baseDirFiles:
        yield rsync.rsyncTo(dstCluster['master']['public_dns'],
                            localTag.metadata['tag_base_dir'],
                            dstTagPath,
                            baseDirFiles,
                            srcCluster['config']['rsync.options'],
                            srcCluster['config']['rsync.user'],
                            log=True)

    if nonBaseDirFiles:
        yield rsync.rsyncTo(dstCluster['master']['public_dns'],
                            '/',
                            dstTagPath,
                            nonBaseDirFiles,
                            srcCluster['config']['rsync.options'],
                            srcCluster['config']['rsync.user'],
                            log=True)

    remoteFiles = ([os.path.join(dstTagPath, f) for f in baseDirFiles] + [
        os.path.join(dstTagPath, _makePathRelative(f)) for f in nonBaseDirFiles
    ])

    metadata = localTag.metadata
    if metadata.get('urls', []) and not metadata.get('urls_realized', False):
        tag = yield _realizeUrls(request)
        remoteFiles.extend(tag['files'])
        metadata = func.updateDict(metadata, {'urls_realized': True})

    # If we are dealing with an adhoc cluster here there is a chance that
    # we are attempting to transfer to a local VM's shared folder which does
    # support chown/chmod so we will skip this step
    if dstCluster['master']['instance_type'] is not None:
        yield ssh.runProcessSSH(
            dstCluster['master']['public_dns'], 'chown -R %s %s' %
            (dstCluster['config']['vappio.user'], dstTagPath), None, log.err,
            srcCluster['config']['ssh.user'],
            srcCluster['config']['ssh.options'])

    defer.returnValue(
        persist.Tag(tagName=localTag.tagName,
                    files=remoteFiles,
                    metadata=func.updateDict(metadata,
                                             {'tag_base_dir': dstTagPath}),
                    phantom=localTag.phantom,
                    taskName=None))
Beispiel #6
0
def _run(state, batchState):
    if 'pipeline_name' in batchState:
        pipelines = yield pipelines_client.pipelineList(
            'localhost',
            'local',
            'guest',
            batchState['pipeline_name'],
            detail=True)
    else:
        pipelines = []

    if not pipelines:
        _log(batchState,
             'First time running, creating pipeline state information')
        batchState['pipeline_config'] = yield _applyActions(
            state.innerPipelineConfig(), batchState['actions'])
        batchState['pipeline_state'] = STARTCLUSTER_STATE

        # We need to create a fake, local, pipeline for metrics to work
        batchState['pipeline_name'] = pipeline_misc.checksumInput(
            batchState['pipeline_config'])
        batchState['pipeline_config']['pipeline.PIPELINE_NAME'] = batchState[
            'pipeline_name']
        batchState['pipeline_config'][
            'pipeline.PIPELINE_WRAPPER_NAME'] = batchState['pipeline_name']

        _log(batchState, 'Pipeline named ' + batchState['pipeline_name'])

        pipeline = yield pipelines_client.createPipeline(
            host='localhost',
            clusterName='local',
            userName='******',
            pipelineName=batchState['pipeline_name'],
            protocol='clovr_wrapper',
            queue='pipeline.q',
            config=batchState['pipeline_config'],
            parentPipeline=state.parentPipeline())

        batchState['lgt_wrapper_task_name'] = pipeline['task_name']

        _log(
            batchState,
            'Setting number of tasks to 9 (number in a standard lgt_wrapper)')
        yield _updateTask(batchState,
                          lambda t: t.update(completedTasks=0, numTasks=9))

        state.updateBatchState()

    batchState['state'] = RUNNING_STATE

    _log(batchState,
         'Pipeline started in %s state' % batchState['pipeline_state'])

    yield _updateTask(batchState,
                      lambda t: t.setState(tasks.task.TASK_RUNNING))

    pipelineConfigFile = os.path.join(TMP_DIR, 'pipeline_configs',
                                      global_state.make_ref() + '.conf')

    _log(batchState, 'Creating ergatis configuration')
    _writeErgatisConfig(batchState['pipeline_config'], pipelineConfigFile)

    if batchState['pipeline_state'] == STARTCLUSTER_STATE:
        _log(batchState, 'Pipeline is in STARTCLUSTER state')

        # First see if the cluster exists but is unresponsive
        try:
            cluster = yield loadCluster(
                'localhost',
                batchState['pipeline_config']['cluster.CLUSTER_NAME'], 'guest')
            if cluster['state'] == 'unresponsive':
                _log(batchState, 'Pipeline is unresponsive, terminating')
                terminateTask = yield clusters_client.terminateCluster(
                    'localhost',
                    batchState['pipeline_config']['cluster.CLUSTER_NAME'],
                    'guest')
                yield _blockOnTask(terminateTask)
        except:
            pass

        batchState['cluster_task'] = yield startCluster(
            batchState, 'localhost',
            batchState['pipeline_config']['cluster.CLUSTER_NAME'], 'guest',
            int(batchState['pipeline_config']['cluster.EXEC_NODES']), 0,
            batchState['pipeline_config']['cluster.CLUSTER_CREDENTIAL'], {
                'cluster.MASTER_INSTANCE_TYPE':
                batchState['pipeline_config']['cluster.MASTER_INSTANCE_TYPE'],
                'cluster.MASTER_BID_PRICE':
                batchState['pipeline_config']['cluster.MASTER_BID_PRICE'],
                'cluster.EXEC_INSTANCE_TYPE':
                batchState['pipeline_config']['cluster.EXEC_INSTANCE_TYPE'],
                'cluster.EXEC_BID_PRICE':
                batchState['pipeline_config']['cluster.EXEC_BID_PRICE']
            })

        yield _updateTask(batchState,
                          lambda t: t.update(completedTasks=0, numTasks=9))

        yield _updateTask(
            batchState, lambda t: t.addMessage(
                tasks.task.MSG_SILENT, 'Completed startcluster').progress())

        batchState['pipeline_state'] = REMOTE_LOCAL_TRANSFER_STATE
        state.updateBatchState()

    if batchState['pipeline_state'] == REMOTE_LOCAL_TRANSFER_STATE:
        _log(batchState, 'Pipeline is in REMOTE_LOCAL_TRANSFER')

        _log(batchState, 'Making sure cluster is exists in some form')
        cluster = yield clusters_client.loadCluster(
            'localhost', batchState['pipeline_config']['cluster.CLUSTER_NAME'],
            'guest')

        if cluster['state'] == 'unresponsive':
            _log(batchState,
                 'Pipeline is unresponsive, erroring and restarting')
            raise Exception('Cluster is not responsive')

        yield state.prerunQueue.addWithDeferred(_remoteLocalTransfer,
                                                batchState)

        yield _updateTask(
            batchState, lambda t: t.addMessage(
                tasks.task.MSG_SILENT, 'Completed remote_local_transfer').
            progress())

        batchState['pipeline_state'] = DECRYPT_STATE
        state.updateBatchState()

    if batchState['pipeline_state'] == DECRYPT_STATE:
        _log(batchState, 'Pipeline is in DECRYPT')

        cluster = yield loadCluster(
            'localhost', batchState['pipeline_config']['cluster.CLUSTER_NAME'],
            'guest')

        tag = yield tags_client.loadTag(
            'localhost', batchState['pipeline_config']['cluster.CLUSTER_NAME'],
            'guest', _decryptTagName(batchState))

        conf = config.configFromStream(open('/tmp/machine.conf'))

        yield ssh.runProcessSSH(cluster['master']['public_dns'],
                                'mkdir -p /mnt/lgt_decrypt',
                                stdoutf=None,
                                stderrf=None,
                                sshUser=conf('ssh.user'),
                                sshFlags=conf('ssh.options'),
                                log=True)

        yield rsync.rsyncTo(
            cluster['master']['public_dns'],
            batchState['pipeline_config']['params.DECRYPT_SCRIPT'],
            '/mnt/',
            options=conf('rsync.options'),
            user=conf('rsync.user'),
            log=True)

        for f in tag['files']:
            decryptCmd = ' '.join([
                os.path.join(
                    '/mnt',
                    os.path.basename(batchState['pipeline_config']
                                     ['params.DECRYPT_SCRIPT'])), f,
                '-out-dir', '/mnt/lgt_decrypt', '-remove-encrypted',
                '-password',
                batchState['pipeline_config']['params.DECRYPT_PASSWORD']
            ])

            yield ssh.getOutput(cluster['master']['public_dns'],
                                decryptCmd,
                                sshUser=conf('ssh.user'),
                                sshFlags=conf('ssh.options'),
                                expected=[0, 253],
                                log=True)

        tag = yield tags_client.tagData(
            host='localhost',
            clusterName=batchState['pipeline_config']['cluster.CLUSTER_NAME'],
            userName='******',
            action='overwrite',
            tagName=_decryptTagName(batchState),
            files=['/mnt/lgt_decrypt'],
            metadata={},
            recursive=True,
            expand=False,
            compressDir=None)

        _log(
            batchState, 'Waiting for tagging of %s to complete - %s' %
            (_decryptTagName(batchState), tag['task_name']))

        yield _blockOnTask(
            tag['task_name'],
            cluster=batchState['pipeline_config']['cluster.CLUSTER_NAME'])

        yield _updateTask(
            batchState, lambda t: t.addMessage(tasks.task.MSG_SILENT,
                                               'Completed decrypt').progress())

        batchState['pipeline_state'] = REFERENCE_TRANSFER_STATE
        state.updateBatchState()

    if batchState['pipeline_state'] == REFERENCE_TRANSFER_STATE:
        _log(batchState, 'Pipeline is in REFERENCE_TRANSFER state')

        transfers = []
        tags = (batchState['pipeline_config']['input.REF_TAG1'].split(',') +
                batchState['pipeline_config']['input.REF_TAG2'].split(','))
        for tag in tags:
            tag = tag.strip()
            output = yield _getOutput(batchState, [
                'vp-transfer-dataset', '-t', '--tag-name=' + tag,
                '--dst-cluster=' +
                batchState['pipeline_config']['cluster.CLUSTER_NAME']
            ],
                                      log=True)

            transfers.append(output['stdout'].strip())

        for task in transfers:
            yield _blockOnTask(task)

        yield _updateTask(
            batchState,
            lambda t: t.addMessage(tasks.task.MSG_SILENT,
                                   'Completed reference_transfer').progress())

        batchState['pipeline_state'] = RUN_PIPELINE_STATE
        state.updateBatchState()

    if batchState['pipeline_state'] == RUN_PIPELINE_STATE:
        _log(batchState, 'Pipeline is in RUN_PIPELINE state')
        batchState['pipeline_config']['input.INPUT_TAG'] = _decryptTagName(
            batchState)
        pipeline = yield pipelines_client.runPipeline(
            host='localhost',
            clusterName=batchState['pipeline_config']['cluster.CLUSTER_NAME'],
            userName='******',
            parentPipeline=batchState['pipeline_name'],
            bareRun=True,
            queue=state.innerPipelineQueue(),
            config=batchState['pipeline_config'],
            overwrite=True)
        batchState['pipeline_task'] = pipeline['task_name']

        yield _updateTask(
            batchState, lambda t: t.addMessage(
                tasks.task.MSG_SILENT, 'Completed run pipeline').progress())
        batchState['pipeline_state'] = RUNNING_PIPELINE_STATE
        state.updateBatchState()

    if batchState['pipeline_state'] == RUNNING_PIPELINE_STATE:
        _log(batchState, 'Pipeline is in RUNNING_PIPELINE state')
        _monitorPipeline(batchState)
        yield _waitForPipeline(batchState)

        yield _updateTask(
            batchState,
            lambda t: t.addMessage(tasks.task.MSG_SILENT,
                                   'Completed running pipeline').progress())
        batchState['pipeline_state'] = HARVEST_STATE
        state.updateBatchState()

    if batchState['pipeline_state'] == HARVEST_STATE:
        _log(batchState, 'Pipeline is in HARVEST state')
        # Using prerunqueue because we want everything here serialized
        yield state.prerunQueue.addWithDeferred(_harvestTransfer, batchState)

        yield _updateTask(
            batchState, lambda t: t.addMessage(tasks.task.MSG_SILENT,
                                               'Completed harvest').progress())

        batchState['pipeline_state'] = SHUTDOWN_STATE
        state.updateBatchState()

    if batchState['pipeline_state'] == SHUTDOWN_STATE:
        _log(batchState, 'Pipeline is in SHUTDOWN state')

        if 'add_instances_task' in batchState:
            try:
                yield _blockOnTask(batchState['add_instances_task'],
                                   cluster=batchState['pipeline_config']
                                   ['cluster.CLUSTER_NAME'])
            except Exception, err:
                logging.errorPrint(str(err))
                log.err(err)

        yield clusters_client.terminateCluster(
            'localhost', batchState['pipeline_config']['cluster.CLUSTER_NAME'],
            'guest')

        yield _updateTask(
            batchState, lambda t: t.addMessage(
                tasks.task.MSG_SILENT, 'Completed shutdown').progress())

        batchState['pipeline_state'] = COMPLETED_STATE
        batchState['state'] = COMPLETED_STATE
        state.updateBatchState()