Example #1
0
def downloadDeferWorkQueue(urls):
    """
    Uses a DeferWorkQueue to download URLs with
    dynamic number of concurrent downloads, here we
    will download 10 concurrently.

    NOTE: In this case we aren't handling failures like in the others
    """
    ret = {}

    def _setUrl(url, content):
        ret[url] = content
    
    getPage = lambda url : http.getPage(url,
                                        connectionTimeout=30,
                                        timeout=30
                                        ).addCallback(lambda content : _setUrl(url, content))

    
    dwq = defer_work_queue.DeferWorkQueue(10)

    for url in urls:
        dwq.add(getPage, url)

    return defer_work_queue.waitForCompletion(dwq).addCallback(lambda _ : ret)
Example #2
0
def run(options):
    logging.logPrint('Starting')

    batchConfig = config.configFromStream(open(options.configFile), lazy=True)
    machineConf = config.configFromStream(open('/tmp/machine.conf'))

    state = State(
        options.workflowConfig, options.batchStatesFile,
        _validateWrapper(
            batchConfig('batch_pipeline.pipeline.PIPELINE_TEMPLATE'),
            pipeline_misc.determineWrapper(
                machineConf,
                batchConfig('batch_pipeline.pipeline.PIPELINE_TEMPLATE'))),
        _interpretBatchFile(options.batchFile),
        _extractInnerPipelineConfig(batchConfig),
        batchConfig('pipeline.PIPELINE_WRAPPER_NAME'),
        int(batchConfig('batch.options.CONCURRENT_PRERUN')),
        int(batchConfig('batch.options.CONCURRENT_PIPELINES')),
        int(batchConfig('batch.options.CONCURRENT_POSTRUN')))

    logging.logPrint('Queuing any incomplete work')
    queueCount = _queueIncompleteWork(state)
    logging.logPrint('Queued: %d' % queueCount)

    if state.pipelinesQueue.hasWork():
        yield defer_work_queue.waitForCompletion(state.pipelinesQueue)

    for batchState in state.batchStates.values():
        if 'state' not in batchState or batchState['state'] == 'failed':
            raise JobFailed()
Example #3
0
def run(options):
    logging.logPrint('Starting')
    
    batchConfig = config.configFromStream(open(options.configFile), lazy=True)
    machineConf = config.configFromStream(open('/tmp/machine.conf'))
    
    state = State(options.workflowConfig,
                  options.batchStatesFile,
                  _validateWrapper(batchConfig('batch_pipeline.pipeline.PIPELINE_TEMPLATE'),
                                   pipeline_misc.determineWrapper(machineConf,
                                                                  batchConfig('batch_pipeline.pipeline.PIPELINE_TEMPLATE'))),
                  _interpretBatchFile(options.batchFile),
                  _extractInnerPipelineConfig(batchConfig),
                  batchConfig('pipeline.PIPELINE_WRAPPER_NAME'),
                  int(batchConfig('batch.options.CONCURRENT_PRERUN')),
                  int(batchConfig('batch.options.CONCURRENT_PIPELINES')),
                  int(batchConfig('batch.options.CONCURRENT_POSTRUN')))

    logging.logPrint('Queuing any incomplete work')
    queueCount = _queueIncompleteWork(state)
    logging.logPrint('Queued: %d' % queueCount)
    
    if state.pipelinesQueue.hasWork():
        yield defer_work_queue.waitForCompletion(state.pipelinesQueue)
    
    for batchState in state.batchStates.values():
        if 'state' not in batchState or batchState['state'] == 'failed':
            raise JobFailed()
Example #4
0
    def initialize(self):
        self.cache = yield mongo_cache.createCache("tags_cache", lambda d: func.updateDict(d, {"_id": d["tag_name"]}))
        self.persistManager.addDependent(self)

        # If there are a lot of tags we want to parallelize caching them
        self.workQueue.parallel = 100

        # Force all tags to be cached
        tags = yield self.persistManager.listTags()
        for tagName in tags:
            yield self.persistManager.loadTag(tagName)

        yield defer_work_queue.waitForCompletion(self.workQueue)

        # Now that we are done, set it back to 1
        self.workQueue.parallel = 1
Example #5
0
    def initialize(self):
        self.cache = yield mongo_cache.createCache('tags_cache',
                                                   lambda d : func.updateDict(d, {'_id': d['tag_name']}))
        self.persistManager.addDependent(self)

        # If there are a lot of tags we want to parallelize caching them
        self.workQueue.parallel = 100
        
        # Force all tags to be cached
        tags = yield self.persistManager.listTags()
        for tagName in tags:
            yield self.persistManager.loadTag(tagName)

        yield defer_work_queue.waitForCompletion(self.workQueue)

        # Now that we are done, set it back to 1
        self.workQueue.parallel = 1
Example #6
0
def validate(state, protocolConf, pipelineConf):
    success = []
    failure = []

    def _validateElement(elm):
        d = _validateType(state, pipelineConf, elm)
        d.addCallback(lambda v : success.extend(v))
        d.addErrback(lambda f : failure.append(f))
        return d

    def _validateElementFunc(elm):
        return lambda : _validateElement(elm)

    dwq = defer_work_queue.DeferWorkQueue(10)
    dwq.extend([_validateElementFunc(e) for e in protocolConf])
    yield defer_work_queue.waitForCompletion(dwq)

    if failure:
        failure[0].raiseException()

    defer.returnValue(success)
Example #7
0
def validate(state, protocolConf, pipelineConf):
    success = []
    failure = []

    def _validateElement(elm):
        d = _validateType(state, pipelineConf, elm)
        d.addCallback(lambda v: success.extend(v))
        d.addErrback(lambda f: failure.append(f))
        return d

    def _validateElementFunc(elm):
        return lambda: _validateElement(elm)

    dwq = defer_work_queue.DeferWorkQueue(10)
    dwq.extend([_validateElementFunc(e) for e in protocolConf])
    yield defer_work_queue.waitForCompletion(dwq)

    if failure:
        failure[0].raiseException()

    defer.returnValue(success)
Example #8
0
def downloadDeferWorkQueue(urls):
    """
    Uses a DeferWorkQueue to download URLs with
    dynamic number of concurrent downloads, here we
    will download 10 concurrently.

    NOTE: In this case we aren't handling failures like in the others
    """
    ret = {}

    def _setUrl(url, content):
        ret[url] = content

    getPage = lambda url: http.getPage(url, connectionTimeout=30, timeout=30
                                       ).addCallback(lambda content: _setUrl(
                                           url, content))

    dwq = defer_work_queue.DeferWorkQueue(10)

    for url in urls:
        dwq.add(getPage, url)

    return defer_work_queue.waitForCompletion(dwq).addCallback(lambda _: ret)