def downloadDeferWorkQueue(urls): """ Uses a DeferWorkQueue to download URLs with dynamic number of concurrent downloads, here we will download 10 concurrently. NOTE: In this case we aren't handling failures like in the others """ ret = {} def _setUrl(url, content): ret[url] = content getPage = lambda url : http.getPage(url, connectionTimeout=30, timeout=30 ).addCallback(lambda content : _setUrl(url, content)) dwq = defer_work_queue.DeferWorkQueue(10) for url in urls: dwq.add(getPage, url) return defer_work_queue.waitForCompletion(dwq).addCallback(lambda _ : ret)
def run(options): logging.logPrint('Starting') batchConfig = config.configFromStream(open(options.configFile), lazy=True) machineConf = config.configFromStream(open('/tmp/machine.conf')) state = State( options.workflowConfig, options.batchStatesFile, _validateWrapper( batchConfig('batch_pipeline.pipeline.PIPELINE_TEMPLATE'), pipeline_misc.determineWrapper( machineConf, batchConfig('batch_pipeline.pipeline.PIPELINE_TEMPLATE'))), _interpretBatchFile(options.batchFile), _extractInnerPipelineConfig(batchConfig), batchConfig('pipeline.PIPELINE_WRAPPER_NAME'), int(batchConfig('batch.options.CONCURRENT_PRERUN')), int(batchConfig('batch.options.CONCURRENT_PIPELINES')), int(batchConfig('batch.options.CONCURRENT_POSTRUN'))) logging.logPrint('Queuing any incomplete work') queueCount = _queueIncompleteWork(state) logging.logPrint('Queued: %d' % queueCount) if state.pipelinesQueue.hasWork(): yield defer_work_queue.waitForCompletion(state.pipelinesQueue) for batchState in state.batchStates.values(): if 'state' not in batchState or batchState['state'] == 'failed': raise JobFailed()
def run(options): logging.logPrint('Starting') batchConfig = config.configFromStream(open(options.configFile), lazy=True) machineConf = config.configFromStream(open('/tmp/machine.conf')) state = State(options.workflowConfig, options.batchStatesFile, _validateWrapper(batchConfig('batch_pipeline.pipeline.PIPELINE_TEMPLATE'), pipeline_misc.determineWrapper(machineConf, batchConfig('batch_pipeline.pipeline.PIPELINE_TEMPLATE'))), _interpretBatchFile(options.batchFile), _extractInnerPipelineConfig(batchConfig), batchConfig('pipeline.PIPELINE_WRAPPER_NAME'), int(batchConfig('batch.options.CONCURRENT_PRERUN')), int(batchConfig('batch.options.CONCURRENT_PIPELINES')), int(batchConfig('batch.options.CONCURRENT_POSTRUN'))) logging.logPrint('Queuing any incomplete work') queueCount = _queueIncompleteWork(state) logging.logPrint('Queued: %d' % queueCount) if state.pipelinesQueue.hasWork(): yield defer_work_queue.waitForCompletion(state.pipelinesQueue) for batchState in state.batchStates.values(): if 'state' not in batchState or batchState['state'] == 'failed': raise JobFailed()
def initialize(self): self.cache = yield mongo_cache.createCache("tags_cache", lambda d: func.updateDict(d, {"_id": d["tag_name"]})) self.persistManager.addDependent(self) # If there are a lot of tags we want to parallelize caching them self.workQueue.parallel = 100 # Force all tags to be cached tags = yield self.persistManager.listTags() for tagName in tags: yield self.persistManager.loadTag(tagName) yield defer_work_queue.waitForCompletion(self.workQueue) # Now that we are done, set it back to 1 self.workQueue.parallel = 1
def initialize(self): self.cache = yield mongo_cache.createCache('tags_cache', lambda d : func.updateDict(d, {'_id': d['tag_name']})) self.persistManager.addDependent(self) # If there are a lot of tags we want to parallelize caching them self.workQueue.parallel = 100 # Force all tags to be cached tags = yield self.persistManager.listTags() for tagName in tags: yield self.persistManager.loadTag(tagName) yield defer_work_queue.waitForCompletion(self.workQueue) # Now that we are done, set it back to 1 self.workQueue.parallel = 1
def validate(state, protocolConf, pipelineConf): success = [] failure = [] def _validateElement(elm): d = _validateType(state, pipelineConf, elm) d.addCallback(lambda v : success.extend(v)) d.addErrback(lambda f : failure.append(f)) return d def _validateElementFunc(elm): return lambda : _validateElement(elm) dwq = defer_work_queue.DeferWorkQueue(10) dwq.extend([_validateElementFunc(e) for e in protocolConf]) yield defer_work_queue.waitForCompletion(dwq) if failure: failure[0].raiseException() defer.returnValue(success)
def validate(state, protocolConf, pipelineConf): success = [] failure = [] def _validateElement(elm): d = _validateType(state, pipelineConf, elm) d.addCallback(lambda v: success.extend(v)) d.addErrback(lambda f: failure.append(f)) return d def _validateElementFunc(elm): return lambda: _validateElement(elm) dwq = defer_work_queue.DeferWorkQueue(10) dwq.extend([_validateElementFunc(e) for e in protocolConf]) yield defer_work_queue.waitForCompletion(dwq) if failure: failure[0].raiseException() defer.returnValue(success)
def downloadDeferWorkQueue(urls): """ Uses a DeferWorkQueue to download URLs with dynamic number of concurrent downloads, here we will download 10 concurrently. NOTE: In this case we aren't handling failures like in the others """ ret = {} def _setUrl(url, content): ret[url] = content getPage = lambda url: http.getPage(url, connectionTimeout=30, timeout=30 ).addCallback(lambda content: _setUrl( url, content)) dwq = defer_work_queue.DeferWorkQueue(10) for url in urls: dwq.add(getPage, url) return defer_work_queue.waitForCompletion(dwq).addCallback(lambda _: ret)